1 Carga de datos

1.1 Metadatos de muestras

1.2 Resumen

cat("Muestras totales:", n_distinct(Tabla_inicial$dataset), "\n\n")
## Muestras totales: 91
Tabla_inicial %>%
  distinct(dataset, batch, Tipo_label) %>%
  count(batch, Tipo_label) %>%
  pivot_wider(names_from = Tipo_label, values_from = n, values_fill = 0) %>%
  knitr::kable(caption = "Muestras por lote y tipo de paciente")
Muestras por lote y tipo de paciente
batch Entrantes (T0) Estancia (T1)
LIB1 12 12
LIB6 10 11
RUN6 11 12
RUN7 12 11
Tabla_inicial %>%
  count(database, batch) %>%
  pivot_wider(names_from = batch, values_from = n, values_fill = 0) %>%
  knitr::kable(caption = "Hits filtrados por base de datos y lote")
Hits filtrados por base de datos y lote
database LIB1 LIB6 RUN6 RUN7
ABR 2269 1904 2027 1804
BAC 9226 10118 11794 9287
REL 3128 3198 3489 3081

2 Índices de diversidad

2.1 Shapiro test

diver_long %>%
  filter(!is.na(Tipo_label), !is.na(value)) %>%
  group_by(database2, index, Tipo_label) %>%
  filter(n() >= 3) %>%
  do(tidy(shapiro.test(.$value))) %>%
  datatable(rownames = FALSE, filter = "top",
            options = list(pageLength = 20, scrollX = TRUE))

2.2 Kruskal-Wallis (T0 vs T1)

diver_long %>%
  filter(!is.na(Tipo_label), !is.na(value)) %>%
  group_by(database2, index) %>%
  filter(n_distinct(Tipo_label) > 1) %>%
  do(tidy(kruskal.test(.$value ~ .$Tipo_label))) %>%
  datatable(rownames = FALSE, filter = "top",
            options = list(pageLength = 20, scrollX = TRUE))

2.3 Shannon

diver_long %>%
  filter(index == "diversity_shannon", !is.na(Tipo_label)) %>%
  ggplot(aes(x = Tipo_label, y = value, fill = Tipo_label)) +
  geom_violin() + geom_boxplot(width = 0.2) +
  facet_wrap(~database2, scales = "free_y") +
  scale_fill_manual(values = c("Entrantes (T0)" = "#4DBBD5",
                               "Estancia (T1)"  = "#E64B35")) +
  labs(title = "Shannon diversity", x = NULL, y = "Shannon Index") +
  theme_classic() +
  theme(legend.position  = "none", strip.background = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1))

grouped_ggbetweenstats(
  data         = diver_long %>%
    filter(index == "diversity_shannon", !is.na(Tipo_label),
           !database2 %in% c("gral","BAC")),
  x            = Tipo_label,
  y            = value,
  type         = "np",
  grouping.var = database2,
  ggplot.component = ggplot2::theme(
    legend.position  = "none", strip.background = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1))
)

2.4 Chao1

diver_long %>%
  filter(index == "chao1", !is.na(Tipo_label)) %>%
  ggplot(aes(x = Tipo_label, y = value, fill = Tipo_label)) +
  geom_violin() + geom_boxplot(width = 0.2) +
  facet_wrap(~database2, scales = "free_y") +
  scale_fill_manual(values = c("Entrantes (T0)" = "#4DBBD5",
                               "Estancia (T1)"  = "#E64B35")) +
  labs(title = "Chao1 index", x = NULL, y = "Chao1") +
  theme_classic() +
  theme(legend.position  = "none", strip.background = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1))

grouped_ggbetweenstats(
  data         = diver_long %>%
    filter(index == "chao1", !is.na(Tipo_label),
           !database2 %in% c("gral","BAC")),
  x            = Tipo_label, y = value, type = "np",
  grouping.var = database2
)

2.5 Dominance Simpson

diver_long %>%
  filter(index == "dominance_simpson", !is.na(Tipo_label)) %>%
  ggplot(aes(x = Tipo_label, y = value, fill = Tipo_label)) +
  geom_violin() + geom_boxplot(width = 0.2) +
  facet_wrap(~database2, scales = "free_y") +
  scale_fill_manual(values = c("Entrantes (T0)" = "#4DBBD5",
                               "Estancia (T1)"  = "#E64B35")) +
  labs(title = "Dominance Simpson", x = NULL, y = "Simpson") +
  theme_classic() +
  theme(legend.position  = "none", strip.background = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1))

grouped_ggbetweenstats(
  data         = diver_long %>%
    filter(index == "dominance_simpson", !is.na(Tipo_label),
           !database2 %in% c("gral","BAC")),
  x            = Tipo_label, y = value, type = "np",
  grouping.var = database2
)

2.6 Diversidad por lote

diver_long %>%
  filter(index == "diversity_shannon", !is.na(batch)) %>%
  ggplot(aes(x = batch, y = value, fill = batch)) +
  geom_violin() + geom_boxplot(width = 0.2) +
  facet_wrap(~database2, scales = "free_y") +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Shannon por lote de secuenciación", x = NULL, y = "Shannon Index") +
  theme_classic() +
  theme(legend.position  = "none", strip.background = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1))

3 Rarefacción

3.1 Shannon rarefied (T0 vs T1)

rare_div %>%
  filter(!is.na(Tipo_label)) %>%
  ggplot(aes(x = Tipo_label, y = shannon_rare, fill = Tipo_label)) +
  geom_boxplot(outlier.size = 1) +
  geom_jitter(width = 0.1, alpha = 0.5) +
  scale_fill_manual(values = c("Entrantes (T0)" = "#4DBBD5",
                               "Estancia (T1)"  = "#E64B35")) +
  labs(title = paste0("Shannon rarefied (depth = ", min_depth, " reads)"),
       y = "Shannon index", x = NULL) +
  theme_classic() + theme(legend.position = "none")

3.2 Riqueza rarefied (T0 vs T1)

rare_div %>%
  filter(!is.na(Tipo_label)) %>%
  ggplot(aes(x = Tipo_label, y = richness_rare, fill = Tipo_label)) +
  geom_boxplot(outlier.size = 1) +
  geom_jitter(width = 0.1, alpha = 0.5) +
  scale_fill_manual(values = c("Entrantes (T0)" = "#4DBBD5",
                               "Estancia (T1)"  = "#E64B35")) +
  labs(title = paste0("ARG richness rarefied (depth = ", min_depth, ")"),
       y = "N ARGs", x = NULL) +
  theme_classic() + theme(legend.position = "none")

3.3 Riqueza por lote y fecha

rare_div %>%
  ggplot(aes(x = pool_id, y = richness_rare, color = Tipo_label, group = Tipo_label)) +
  geom_line() + geom_point(size = 2) +
  facet_wrap(~batch, scales = "free_x") +
  scale_color_manual(values = c("Entrantes (T0)" = "#4DBBD5",
                                "Estancia (T1)"  = "#E64B35")) +
  labs(title = "Riqueza rarefied por semana/pool y lote",
       y = "N ARGs", x = "Pool ID", color = NULL) +
  theme_classic() +
  theme(strip.background  = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1, size = 7))

4 Core / Shell / Rare resistome

4.1 Visión general

arg_prevalence %>%
  count(resistome_class) %>%
  mutate(resistome_class = factor(resistome_class, levels = c("Core","Shell","Rare"))) %>%
  ggplot(aes(x = resistome_class, y = n, fill = resistome_class)) +
  geom_col(width = 0.6) +
  geom_text(aes(label = n), vjust = -0.4, size = 4) +
  scale_fill_manual(values = c("Core"="#E64B35","Shell"="#4DBBD5","Rare"="#B0B0B0")) +
  labs(title = "Clasificación del resistoma ABR",
       subtitle = paste0("Core ≥90% | Shell 15–89% | Rare <15% (n=", n_samples, " muestras)"),
       x = NULL, y = "N templates ARG") +
  theme_classic() + theme(legend.position = "none")

4.2 Clases de antibiótico — Core

arg_prevalence %>%
  filter(resistome_class == "Core") %>%
  count(Class) %>%
  arrange(desc(n)) %>%
  ggplot(aes(x = reorder(Class, n), y = n, fill = n)) +
  geom_col() + coord_flip() +
  scale_fill_viridis_c(option = "C", direction = -1) +
  labs(title = "Clases en el Core resistome", x = NULL, y = "N templates") +
  theme_classic() + theme(legend.position = "none")

4.3 Distribución de prevalencia

arg_prevalence %>%
  ggplot(aes(x = prevalence_pct, fill = resistome_class)) +
  geom_histogram(binwidth = 5, color = "white") +
  geom_vline(xintercept = c(15, 90), linetype = "dashed", color = "grey40") +
  scale_fill_manual(values = c("Core"="#E64B35","Shell"="#4DBBD5","Rare"="#B0B0B0")) +
  labs(title = "Distribución de prevalencia de ARGs",
       x = "Prevalencia (% muestras)", y = "N templates", fill = NULL) +
  theme_classic()

4.4 Tabla Core

arg_prevalence %>%
  filter(resistome_class == "Core") %>%
  arrange(desc(prevalence_pct)) %>%
  select(template, Class, prevalence_pct, n_samples_detected, mean_depth) %>%
  mutate(across(where(is.numeric), ~round(., 1))) %>%
  datatable(rownames = FALSE, filter = "top",
            options = list(pageLength = 20, scrollX = TRUE))

5 PCA

5.1 PCA Global por Tipo

pca_df(PCA) %>% select(dataset, PC1, PC2, Tipo_label) %>% distinct() %>%
  ggplot(aes(PC1, PC2, color = Tipo_label)) +
  geom_point(size = 3) +
  scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
                     na.value = "grey60") +
  labs(title = "PCA Global", x = paste0("PC1 ", pct(PCA)[1], "%"),
       y = paste0("PC2 ", pct(PCA)[2], "%"), color = NULL) +
  theme_classic()

5.2 PCA Global por lote

pca_df(PCA) %>% select(dataset, PC1, PC2, batch) %>% distinct() %>%
  ggplot(aes(PC1, PC2, color = batch)) +
  geom_point(size = 3) + scale_color_brewer(palette = "Set1") +
  labs(title = "PCA Global (por lote)", x = paste0("PC1 ", pct(PCA)[1], "%"),
       y = paste0("PC2 ", pct(PCA)[2], "%"), color = "Lote") +
  theme_classic()

5.3 PCA ABR

pca_df(PCA_ABR) %>% select(dataset, PC1, PC2, Tipo_label) %>% distinct() %>%
  ggplot(aes(PC1, PC2, color = Tipo_label)) +
  geom_point(size = 3) +
  scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
                     na.value = "grey60") +
  labs(title = "PCA ARGs", x = paste0("PC1 ", pct(PCA_ABR)[1], "%"),
       y = paste0("PC2 ", pct(PCA_ABR)[2], "%"), color = NULL) +
  theme_classic()

5.4 PCA BAC

pca_df(PCA_BAC) %>% select(dataset, PC1, PC2, Tipo_label) %>% distinct() %>%
  ggplot(aes(PC1, PC2, color = Tipo_label)) +
  geom_point(size = 3) +
  scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
                     na.value = "grey60") +
  labs(title = "PCA BacMet", x = paste0("PC1 ", pct(PCA_BAC)[1], "%"),
       y = paste0("PC2 ", pct(PCA_BAC)[2], "%"), color = NULL) +
  theme_classic()

5.5 PCA REL

pca_df(PCA_REL) %>% select(dataset, PC1, PC2, Tipo_label) %>% distinct() %>%
  ggplot(aes(PC1, PC2, color = Tipo_label)) +
  geom_point(size = 3) +
  scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
                     na.value = "grey60") +
  labs(title = "PCA Relaxasas", x = paste0("PC1 ", pct(PCA_REL)[1], "%"),
       y = paste0("PC2 ", pct(PCA_REL)[2], "%"), color = NULL) +
  theme_classic()

5.6 PCA BAC Metals

pca_df(PCA_BAC_Metal) %>% select(dataset, PC1, PC2, Tipo_label) %>% distinct() %>%
  ggplot(aes(PC1, PC2, color = Tipo_label)) +
  geom_point(size = 3) +
  scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
                     na.value = "grey60") +
  labs(title = "PCA BAC Metales", x = paste0("PC1 ", pct(PCA_BAC_Metal)[1], "%"),
       y = paste0("PC2 ", pct(PCA_BAC_Metal)[2], "%"), color = NULL) +
  theme_classic()

5.7 PCA BAC Biocide

pca_df(PCA_BAC_Biocide) %>% select(dataset, PC1, PC2, Tipo_label) %>% distinct() %>%
  ggplot(aes(PC1, PC2, color = Tipo_label)) +
  geom_point(size = 3) +
  scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
                     na.value = "grey60") +
  labs(title = "PCA BAC Biocides", x = paste0("PC1 ", pct(PCA_BAC_Biocide)[1], "%"),
       y = paste0("PC2 ", pct(PCA_BAC_Biocide)[2], "%"), color = NULL) +
  theme_classic()

6 UMAP

6.1 UMAP Global

umap_plot %>% select(dataset, V1, V2, Tipo_label) %>% distinct() %>%
  ggplot(aes(V1, V2, color = Tipo_label)) + geom_point(size = 3) +
  scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
                     na.value = "grey60") +
  labs(title = "UMAP General", color = NULL) + theme_classic()

6.2 UMAP ABR

umap_plot_ABR %>% select(dataset, V1, V2, Tipo_label) %>% distinct() %>%
  ggplot(aes(V1, V2, color = Tipo_label)) + geom_point(size = 3) +
  scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
                     na.value = "grey60") +
  labs(title = "UMAP ARG", color = NULL) + theme_classic()

6.3 UMAP BAC

umap_plot_BAC %>% select(dataset, V1, V2, Tipo_label) %>% distinct() %>%
  ggplot(aes(V1, V2, color = Tipo_label)) + geom_point(size = 3) +
  scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
                     na.value = "grey60") +
  labs(title = "UMAP BAC", color = NULL) + theme_classic()

6.4 UMAP REL

umap_plot_REL %>% select(dataset, V1, V2, Tipo_label) %>% distinct() %>%
  ggplot(aes(V1, V2, color = Tipo_label)) + geom_point(size = 3) +
  scale_color_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
                     na.value = "grey60") +
  labs(title = "UMAP REL", color = NULL) + theme_classic()

7 Adonis (PERMANOVA)

7.1 Adonis Global

tmp <- rownames_to_column(Guayana_spread, "dataset") %>%
  left_join(Tabla_inicial %>% select(dataset, Tipo_label, batch) %>% distinct(), by = "dataset") %>%
  column_to_rownames("dataset")
adonis2(select_if(tmp, is.numeric) ~ Tipo_label, data = tmp, perm = 999, na.action = na.omit)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = select_if(tmp, is.numeric) ~ Tipo_label, data = tmp, permutations = 999, na.action = na.omit)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.6012 0.02149 1.9547  0.003 **
## Residual 89  27.3723 0.97851                 
## Total    90  27.9735 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7.2 Adonis por lote

adonis2(select_if(tmp, is.numeric) ~ batch, data = tmp, perm = 999, na.action = na.omit)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = select_if(tmp, is.numeric) ~ batch, data = tmp, permutations = 999, na.action = na.omit)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     3   1.5228 0.05444 1.6696  0.002 **
## Residual 87  26.4507 0.94556                 
## Total    90  27.9735 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7.3 Adonis ABR

tmp_abr <- rownames_to_column(Guayana_spread_ABR, "dataset") %>%
  left_join(Tabla_inicial %>% select(dataset, Tipo_label, batch) %>% distinct(), by = "dataset") %>%
  column_to_rownames("dataset")
adonis2(select_if(tmp_abr, is.numeric) ~ Tipo_label, data = tmp_abr, perm = 999, na.action = na.omit)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = select_if(tmp_abr, is.numeric) ~ Tipo_label, data = tmp_abr, permutations = 999, na.action = na.omit)
##          Df SumOfSqs      R2     F Pr(>F)   
## Model     1   0.4679 0.02495 2.277  0.009 **
## Residual 89  18.2893 0.97505                
## Total    90  18.7573 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7.4 Adonis BAC

tmp_bac <- rownames_to_column(Guayana_spread_BAC, "dataset") %>%
  left_join(Tabla_inicial %>% select(dataset, Tipo_label, batch) %>% distinct(), by = "dataset") %>%
  column_to_rownames("dataset")
adonis2(select_if(tmp_bac, is.numeric) ~ Tipo_label, data = tmp_bac, perm = 999, na.action = na.omit)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = select_if(tmp_bac, is.numeric) ~ Tipo_label, data = tmp_bac, permutations = 999, na.action = na.omit)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1    0.745 0.01934 1.7553  0.005 **
## Residual 89   37.789 0.98066                 
## Total    90   38.535 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7.5 Adonis REL

tmp_rel <- rownames_to_column(Guayana_spread_REL, "dataset") %>%
  left_join(Tabla_inicial %>% select(dataset, Tipo_label, batch) %>% distinct(), by = "dataset") %>%
  column_to_rownames("dataset")
adonis2(select_if(tmp_rel, is.numeric) ~ Tipo_label, data = tmp_rel, perm = 999, na.action = na.omit)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = select_if(tmp_rel, is.numeric) ~ Tipo_label, data = tmp_rel, permutations = 999, na.action = na.omit)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   0.7388 0.03118 2.8646  0.001 ***
## Residual 89  22.9544 0.96882                  
## Total    90  23.6932 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

8 Composición del resistoma

8.1 Top ARGs por prevalencia

top_args <- Tabla_inicial_info %>%
  filter(database == "ABR") %>%
  group_by(template, Class) %>%
  summarise(n_muestras = n_distinct(dataset),
            mean_depth = round(mean(depth), 2), .groups = "drop") %>%
  arrange(desc(n_muestras)) %>%
  slice_head(n = 40)

datatable(top_args, rownames = FALSE, filter = "top",
          caption = "Top 40 ARGs por prevalencia",
          options = list(pageLength = 15, scrollX = TRUE))

8.2 Prevalencia por clase de antibiótico

Tabla_inicial_info %>%
  filter(database == "ABR", !is.na(Class)) %>%
  left_join(metadata_nobatch, by = "dataset") %>%
  filter(!is.na(Tipo_label)) %>%
  group_by(Class, Tipo_label) %>%
  summarise(n = n_distinct(dataset), .groups = "drop") %>%
  mutate(pct = n / n_distinct(Tabla_inicial$dataset) * 100) %>%
  ggplot(aes(x = reorder(Class, -pct), y = pct, fill = Tipo_label)) +
  geom_col(position = position_dodge(0.8), width = 0.7) +
  scale_fill_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35")) +
  labs(title = "Prevalencia por clase de antibiótico", x = NULL, y = "% muestras", fill = NULL) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 50, hjust = 1, size = 9))

8.3 Heatmap ARGs top 40

top40_genes <- top_args$template[1:min(40, nrow(top_args))]
heat_data   <- Guayana_spread_ABR[, colnames(Guayana_spread_ABR) %in% top40_genes, drop = FALSE]

if (ncol(heat_data) > 1) {
  heat_pa  <- (heat_data > 0) * 1
  ann_col  <- metadata %>%
    filter(dataset %in% rownames(heat_pa)) %>%
    select(dataset, batch, Tipo_label) %>%
    column_to_rownames("dataset")

  pheatmap(
    t(heat_pa[rownames(ann_col), , drop = FALSE]),
    annotation_col  = ann_col,
    show_colnames   = FALSE,
    color           = c("white", "#1565C0"),
    main            = "Presencia/ausencia top 40 ARGs",
    fontsize_row    = 7,
    annotation_colors = list(
      batch      = setNames(RColorBrewer::brewer.pal(4,"Set1"), c("LIB1","LIB6","RUN6","RUN7")),
      Tipo_label = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35")
    )
  )
}

8.4 Top compuestos BacMet

bac_prev <- Tabla_inicial_info %>%
  filter(database == "BAC") %>%
  left_join(metadata_nobatch, by = "dataset") %>%
  filter(!is.na(Compound)) %>%
  group_by(Compound, Class_compound, Tipo_label) %>%
  summarise(n = n_distinct(dataset), .groups = "drop")

if (nrow(bac_prev) > 0) {
  bac_prev %>%
    group_by(Compound) %>% mutate(total = sum(n)) %>% ungroup() %>%
    filter(dense_rank(desc(total)) <= 20) %>%
    ggplot(aes(x = reorder(Compound, -total), y = n, fill = Tipo_label)) +
    geom_col(position = position_dodge(0.8)) +
    scale_fill_manual(values = c("Entrantes (T0)"="#4DBBD5","Estancia (T1)"="#E64B35"),
                      na.value = "gray60") +
    labs(title = "Top 20 compuestos BacMet", x = NULL, y = "N muestras", fill = NULL) +
    theme_classic() +
    theme(axis.text.x = element_text(angle = 50, hjust = 1, size = 8))
} else {
  cat("Sin hits BAC.\n")
}

8.5 Relaxasas

rel_summary <- Tabla_inicial_info %>%
  filter(database == "REL") %>%
  left_join(metadata_nobatch, by = "dataset") %>%
  group_by(template_gene) %>%
  summarise(n_muestras = n_distinct(dataset),
            batches    = paste(sort(unique(batch)), collapse = ", "),
            .groups    = "drop") %>%
  arrange(desc(n_muestras))

if (nrow(rel_summary) > 0) {
  datatable(rel_summary, rownames = FALSE,
            caption = "Genes de movilización (REL) detectados")
} else {
  cat("Sin hits REL.\n")
}

9 Tabla resumen

Tabla_inicial_info %>%
  filter(database == "ABR") %>%
  left_join(metadata_nobatch, by = "dataset") %>%
  group_by(dataset, batch, pool_id, Tipo_label) %>%
  summarise(n_ARGs    = n_distinct(template),
            n_clases  = n_distinct(Class, na.rm = TRUE),
            mean_depth = round(mean(depth), 2),
            .groups   = "drop") %>%
  arrange(batch, pool_id, Tipo_label) %>%
  datatable(rownames = FALSE, filter = "top",
            caption  = "Resumen ARGs por muestra",
            options  = list(pageLength = 15, scrollX = TRUE))

10 Resistoma endémico de la UCI

10.1 Core T1 vs Core T0

bind_rows(
  arg_prev_T1 %>% mutate(grupo = "Estancia T1"),
  arg_prev_T0 %>% rename(prev_pct = prev_pct_T0) %>%
    mutate(resistome_class = case_when(
      prev_pct >= 90 ~ "Core", prev_pct >= 15 ~ "Shell", TRUE ~ "Rare"
    ), grupo = "Entrantes T0")
) %>%
  count(grupo, resistome_class) %>%
  mutate(resistome_class = factor(resistome_class, levels = c("Core","Shell","Rare"))) %>%
  ggplot(aes(x = resistome_class, y = n, fill = grupo)) +
  geom_col(position = position_dodge(0.7), width = 0.6) +
  geom_text(aes(label = n), position = position_dodge(0.7), vjust = -0.3, size = 3.5) +
  scale_fill_manual(values = c("Estancia T1" = "#E64B35", "Entrantes T0" = "#4DBBD5")) +
  labs(title = "Clasificación del resistoma: T1 (UCI) vs T0 (entrantes)",
       x = NULL, y = "N genes", fill = NULL) +
  theme_classic() + theme(legend.position = "top")

10.2 Genes endémicos de la UCI

cat("Genes Core T1:", length(core_T1_genes), "\n")
## Genes Core T1: 22
cat("Genes Core T0:", length(core_T0_genes), "\n")
## Genes Core T0: 28
cat("Genes endémicos UCI (Core T1, NO Core T0):", length(endemic_genes), "\n\n")
## Genes endémicos UCI (Core T1, NO Core T0): 1
arg_prev_T1 %>%
  filter(template %in% endemic_genes) %>%
  left_join(arg_prev_T0, by = "template") %>%
  arrange(desc(prev_pct)) %>%
  mutate(across(where(is.numeric), ~round(., 1))) %>%
  datatable(rownames = FALSE, filter = "top",
            caption = "Genes endémicos UCI: Core en T1, no Core en T0",
            options = list(pageLength = 15, scrollX = TRUE))

10.3 Estabilidad temporal de T1

# Bray-Curtis entre semanas consecutivas de T1 (orden global por week_abs)
meta_T1_ord <- metadata %>%
  filter(Tipo == "1") %>%
  arrange(week_abs) %>%
  mutate(semana_rank = row_number())

# Matriz de distancias T1
dist_T1 <- vegdist(spread_T1_abr[meta_T1_ord$dataset[meta_T1_ord$dataset %in% rownames(spread_T1_abr)], ], method = "bray")

# Comparar distancia entre semanas consecutivas vs no consecutivas
dist_T1_df <- as.data.frame(as.matrix(dist_T1)) %>%
  rownames_to_column("ds1") %>%
  pivot_longer(-ds1, names_to = "ds2", values_to = "bray") %>%
  filter(ds1 < ds2) %>%
  left_join(meta_T1_ord %>% select(dataset, semana_rank, week_abs), by = c("ds1" = "dataset")) %>%
  left_join(meta_T1_ord %>% select(dataset, semana_rank, week_abs), by = c("ds2" = "dataset"),
            suffix = c("_1","_2")) %>%
  mutate(consecutivas = abs(semana_rank_1 - semana_rank_2) == 1,
         tipo_par = ifelse(consecutivas, "Semanas consecutivas", "Semanas no consecutivas"))

ggplot(dist_T1_df, aes(x = tipo_par, y = bray, fill = tipo_par)) +
  geom_boxplot(outlier.size = 1) +
  geom_jitter(width = 0.1, alpha = 0.4, size = 1) +
  scale_fill_manual(values = c("Semanas consecutivas" = "#4CAF50",
                               "Semanas no consecutivas" = "#9E9E9E")) +
  stat_compare_means(method = "wilcox.test", label.x.npc = 0.35) +
  labs(title = "Estabilidad T1: distancia Bray-Curtis entre semanas",
       subtitle = "Distancia baja = resistoma estable",
       x = NULL, y = "Bray-Curtis", fill = NULL) +
  theme_classic() + theme(legend.position = "none")

11 Influencia T0 → T1 (misma semana)

11.1 Similitud T0–T1 misma semana vs aleatoria

bind_rows(
  same_week_dist %>% select(bray = bray_T0_T1) %>% mutate(tipo = "Misma semana"),
  random_pairs %>% select(bray) %>% mutate(tipo = "Par aleatorio")
) %>%
  ggplot(aes(x = tipo, y = bray, fill = tipo)) +
  geom_boxplot(outlier.size = 1) +
  geom_jitter(width = 0.1, alpha = 0.5, size = 1.5) +
  scale_fill_manual(values = c("Misma semana" = "#9C27B0", "Par aleatorio" = "#9E9E9E")) +
  stat_compare_means(method = "wilcox.test", label.x.npc = 0.35) +
  labs(title = "¿Se parece T0 al T1 de la misma semana?",
       subtitle = "Distancia baja = T0 similar a T1 (influencia de entrantes)",
       x = NULL, y = "Bray-Curtis", fill = NULL) +
  theme_classic() + theme(legend.position = "none")

11.2 Procrustes T0 ↔︎ T1 (misma semana)

# Solo pools con par completo
common_pools_T0 <- pairs_same$ds_0[pairs_same$ds_0 %in% rownames(spread_T0_abr)]
common_pools_T1 <- pairs_same$ds_1[pairs_same$ds_1 %in% rownames(spread_T1_abr)]

if (length(common_pools_T0) >= 5) {
  mat_T0 <- spread_T0_abr[common_pools_T0, , drop = FALSE]
  mat_T1 <- spread_T1_abr[common_pools_T1, , drop = FALSE]

  # Asegurar mismas columnas
  common_cols <- intersect(colnames(mat_T0), colnames(mat_T1))
  if (length(common_cols) == 0) {
    all_cols <- union(colnames(mat_T0), colnames(mat_T1))
    mat_T0 <- mat_T0[, all_cols[all_cols %in% colnames(mat_T0)], drop = FALSE]
    mat_T1 <- mat_T1[, all_cols[all_cols %in% colnames(mat_T1)], drop = FALSE]
  }

  pca_T0_p <- prcomp(decostand(mat_T0, "hellinger"))
  pca_T1_p <- prcomp(decostand(mat_T1, "hellinger"))

  prot <- protest(pca_T0_p$x, pca_T1_p$x, permutations = 999)
  cat("Procrustes T0 ↔ T1 misma semana\n")
  cat("m² =", round(prot$ss, 4), " | p =", prot$signif, "\n")
  cat("Correlación =", round(sqrt(1 - prot$ss), 4), "\n\n")

  proc_df <- data.frame(
    T1_x = prot$Yrot[,1], T1_y = prot$Yrot[,2],
    T0_x = prot$X[,1],    T0_y = prot$X[,2],
    ds_T1 = rownames(prot$Yrot)
  ) %>%
    left_join(pairs_same %>% select(ds_0, ds_1, batch_T0), by = c("ds_T1" = "ds_1"))

  ggplot(proc_df) +
    geom_segment(aes(x = T0_x, y = T0_y, xend = T1_x, yend = T1_y, color = batch_T0),
                 arrow = arrow(length = unit(0.2,"cm")), alpha = 0.7) +
    geom_point(aes(T0_x, T0_y, color = batch_T0), shape = 1, size = 3) +
    geom_point(aes(T1_x, T1_y, color = batch_T0), shape = 16, size = 3) +
    scale_color_brewer(palette = "Set1") +
    annotate("text", x = Inf, y = -Inf, hjust = 1.1, vjust = -0.5, size = 3.5,
             label = paste0("m² = ", round(prot$ss, 3), "  p = ", prot$signif)) +
    labs(title = "Procrustes: T0 (círculo) → T1 (punto) misma semana",
         subtitle = "Flechas cortas = alta similitud T0–T1",
         x = "PC1", y = "PC2", color = "Lote") +
    theme_classic()
} else {
  cat("Insuficientes pares T0-T1 para Procrustes.\n")
}
## Procrustes T0 ↔ T1 misma semana
## m² = 0.4442  | p = 0.882 
## Correlación = 0.7455

11.3 Overlap génico por semana

overlap_stats <- pairs_same %>%
  pmap_dfr(function(pool_id, ds_0, ds_1, batch_T0, ...) {
    g0 <- Tabla_inicial %>% filter(dataset == ds_0, database == "ABR") %>% pull(template)
    g1 <- Tabla_inicial %>% filter(dataset == ds_1, database == "ABR") %>% pull(template)
    tibble(
      batch       = batch_T0,
      pool_id     = pool_id,
      n_T0        = length(g0),
      n_T1        = length(g1),
      n_shared    = length(intersect(g0, g1)),
      n_only_T0   = length(setdiff(g0, g1)),
      n_only_T1   = length(setdiff(g1, g0)),
      jaccard     = length(intersect(g0,g1)) / length(union(g0,g1))
    )
  })

overlap_stats %>%
  select(batch, pool_id, n_T0, n_T1, n_shared, n_only_T0, n_only_T1, jaccard) %>%
  mutate(jaccard = round(jaccard, 3)) %>%
  datatable(rownames = FALSE, filter = "top",
            caption = "Overlap génico T0-T1 por semana/pool",
            options = list(pageLength = 15, scrollX = TRUE))
overlap_stats %>%
  pivot_longer(cols = c(n_shared, n_only_T0, n_only_T1),
               names_to = "categoria", values_to = "n") %>%
  mutate(categoria = factor(categoria,
    levels = c("n_only_T0","n_shared","n_only_T1"),
    labels = c("Solo T0","Compartido","Solo T1"))) %>%
  ggplot(aes(x = pool_id, y = n, fill = categoria)) +
  geom_col(position = "stack") +
  facet_wrap(~batch, scales = "free_x") +
  scale_fill_manual(values = c("Solo T0"="#4DBBD5","Compartido"="#91D1C2",
                               "Solo T1"="#E64B35")) +
  labs(title = "Overlap de ARGs T0–T1 por semana", x = "Pool/semana",
       y = "N genes", fill = NULL) +
  theme_classic() +
  theme(strip.background = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1, size = 7))

12 Efecto retardado T0(W) → T1(W+1)

12.1 Similitud retardada vs aleatoria

bind_rows(
  lagged_pairs %>% select(bray = bray_lagged) %>% mutate(tipo = "T0(W) → T1(W+1)"),
  same_week_dist %>% select(bray = bray_T0_T1) %>% mutate(tipo = "T0 → T1 misma semana"),
  random_pairs %>% select(bray) %>% mutate(tipo = "Par aleatorio")
) %>%
  mutate(tipo = factor(tipo, levels = c("T0(W) → T1(W+1)",
                                        "T0 → T1 misma semana",
                                        "Par aleatorio"))) %>%
  ggplot(aes(x = tipo, y = bray, fill = tipo)) +
  geom_boxplot(outlier.size = 1) +
  geom_jitter(width = 0.1, alpha = 0.5, size = 1.5) +
  scale_fill_manual(values = c("T0(W) → T1(W+1)"    = "#FF9800",
                               "T0 → T1 misma semana" = "#9C27B0",
                               "Par aleatorio"        = "#9E9E9E")) +
  stat_compare_means(comparisons = list(c("T0(W) → T1(W+1)","Par aleatorio"),
                                        c("T0(W) → T1(W+1)","T0 → T1 misma semana")),
                     method = "wilcox.test") +
  labs(title = "¿Predice T0(W) el resistoma de T1(W+1)?",
       x = NULL, y = "Bray-Curtis (distancia)", fill = NULL) +
  theme_classic() + theme(legend.position = "none",
                          axis.text.x = element_text(angle = 20, hjust = 1))

12.2 Procrustes retardado T0(W) → T1(W+1)

lag_T0 <- lagged_pairs$dataset_T0[lagged_pairs$dataset_T0 %in% rownames(spread_T0_abr)]
lag_T1 <- lagged_pairs$dataset_T1[lagged_pairs$dataset_T1 %in% rownames(spread_T1_abr)]

if (length(lag_T0) >= 5) {
  mat_lag_T0 <- spread_T0_abr[lag_T0, , drop = FALSE]
  mat_lag_T1 <- spread_T1_abr[lag_T1, , drop = FALSE]

  pca_lag_T0 <- prcomp(decostand(mat_lag_T0, "hellinger"))
  pca_lag_T1 <- prcomp(decostand(mat_lag_T1, "hellinger"))

  prot_lag <- protest(pca_lag_T0$x, pca_lag_T1$x, permutations = 999)
  cat("Procrustes retardado T0(W) → T1(W+1)\n")
  cat("m² =", round(prot_lag$ss, 4), " | p =", prot_lag$signif, "\n")
  cat("Correlación =", round(sqrt(1 - prot_lag$ss), 4), "\n\n")

  proc_lag_df <- data.frame(
    T1_x = prot_lag$Yrot[,1], T1_y = prot_lag$Yrot[,2],
    T0_x = prot_lag$X[,1],    T0_y = prot_lag$X[,2],
    ds_T0 = rownames(prot_lag$X)
  ) %>%
    left_join(lagged_pairs %>% select(dataset_T0, batch_T0, pool_id_T0), by = c("ds_T0" = "dataset_T0"))

  ggplot(proc_lag_df) +
    geom_segment(aes(x = T0_x, y = T0_y, xend = T1_x, yend = T1_y, color = batch_T0),
                 arrow = arrow(length = unit(0.2,"cm")), alpha = 0.7) +
    geom_point(aes(T0_x, T0_y, color = batch_T0), shape = 1, size = 3) +
    geom_point(aes(T1_x, T1_y, color = batch_T0), shape = 16, size = 3) +
    scale_color_brewer(palette = "Set1") +
    annotate("text", x = Inf, y = -Inf, hjust = 1.1, vjust = -0.5, size = 3.5,
             label = paste0("m² = ", round(prot_lag$ss, 3), "  p = ", prot_lag$signif)) +
    labs(title = "Procrustes: T0(W) (círculo) → T1(W+1) (punto)",
         subtitle = "Flechas cortas = T0 predice T1 siguiente semana",
         x = "PC1", y = "PC2", color = "Lote") +
    theme_classic()
} else {
  cat("Insuficientes pares retardados para Procrustes.\n")
}
## Procrustes retardado T0(W) → T1(W+1)
## m² = 0.3895  | p = 0.033 
## Correlación = 0.7813

12.3 PERMANOVA parcial: ¿cuánto explica T0(W) de T1(W+1)?

# Construir tabla de T1 de los pares retardados
if (nrow(lagged_pairs) >= 6) {
  mat_T1_lag_full <- spread_T1_abr[lagged_pairs$dataset_T1, , drop = FALSE]
  mat_T1_lag_full[is.na(mat_T1_lag_full)] <- 0

  # Vector de diversidad Shannon de T0 como covariable
  sha_T0 <- vegan::diversity(
    spread_T0_abr[lagged_pairs$dataset_T0, , drop = FALSE],
    index = "shannon"
  )

  meta_lag_perm <- lagged_pairs %>%
    mutate(shannon_T0 = sha_T0)

  dist_T1_lag <- vegdist(mat_T1_lag_full, method = "bray")

  cat("**PERMANOVA: varianza de T1(W+1) explicada por Shannon_T0(W) + batch**\n\n")
  print(adonis2(dist_T1_lag ~ shannon_T0 + batch_T0,
                data = meta_lag_perm, permutations = 999))
}

PERMANOVA: varianza de T1(W+1) explicada por Shannon_T0(W) + batch

Permutation test for adonis under reduced model Permutation: free Number of permutations: 999

adonis2(formula = dist_T1_lag ~ shannon_T0 + batch_T0, data = meta_lag_perm, permutations = 999) Df SumOfSqs R2 F Pr(>F)
Model 4 1.2111 0.13035 1.4614 0.035 * Residual 39 8.0801 0.86965
Total 43 9.2912 1.00000
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1

12.4 Correlación Shannon T0(W) → T1(W+1)

sha_T0_df <- tibble(
  dataset_T0   = rownames(spread_T0_abr),
  shannon_T0   = vegan::diversity(spread_T0_abr, "shannon")
)
sha_T1_df <- tibble(
  dataset_T1   = rownames(spread_T1_abr),
  shannon_T1   = vegan::diversity(spread_T1_abr, "shannon")
)

corr_df <- lagged_pairs %>%
  left_join(sha_T0_df, by = "dataset_T0") %>%
  left_join(sha_T1_df, by = "dataset_T1")

ggplot(corr_df, aes(x = shannon_T0, y = shannon_T1, color = batch_T0)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", color = "black", linetype = "dashed", se = TRUE) +
  stat_cor(method = "spearman", label.x.npc = 0.05, label.y.npc = 0.95) +
  scale_color_brewer(palette = "Set1") +
  labs(title = "¿Predice la diversidad de entrantes (T0_W) la diversidad de ingresados (T1_{W+1})?",
       x = "Shannon T0 (semana W)", y = "Shannon T1 (semana W+1)", color = "Lote") +
  theme_classic()

12.5 Genes T0(W) que persisten en T1(W+1)

persistence <- lagged_pairs %>%
  pmap_dfr(function(pool_id_T0, pool_id_T1, dataset_T0, dataset_T1, batch_T0, ...) {
    g0 <- Tabla_inicial %>% filter(dataset == dataset_T0, database == "ABR") %>% pull(template)
    g1 <- Tabla_inicial %>% filter(dataset == dataset_T1, database == "ABR") %>% pull(template)
    tibble(
      batch        = batch_T0,
      pool_T0      = pool_id_T0,
      pool_T1      = pool_id_T1,
      n_T0         = length(g0),
      n_T1         = length(g1),
      n_persist    = length(intersect(g0, g1)),
      pct_T0_en_T1 = ifelse(length(g0) > 0, length(intersect(g0,g1)) / length(g0) * 100, NA),
      pct_T1_de_T0 = ifelse(length(g1) > 0, length(intersect(g0,g1)) / length(g1) * 100, NA)
    )
  })

persistence %>%
  pivot_longer(cols = c(pct_T0_en_T1, pct_T1_de_T0), names_to = "metrica", values_to = "pct") %>%
  mutate(metrica = recode(metrica,
    pct_T0_en_T1 = "% genes T0(W) encontrados en T1(W+1)",
    pct_T1_de_T0 = "% genes T1(W+1) que vienen de T0(W)")) %>%
  ggplot(aes(x = batch, y = pct, fill = metrica)) +
  geom_boxplot(position = position_dodge(0.8)) +
  scale_fill_manual(values = c("#4DBBD5","#E64B35")) +
  labs(title = "Persistencia génica T0(W) → T1(W+1)",
       x = NULL, y = "% genes", fill = NULL) +
  theme_classic() +
  theme(legend.position = "top",
        axis.text.x = element_text(angle = 20, hjust = 1))

13 ¿Se puede predecir el T1 desde T0?

13.1 Genes con mayor poder predictivo

gene_pred %>%
  filter(p_adj < 0.05) %>%
  mutate(across(where(is.numeric), ~round(., 3))) %>%
  datatable(rownames = FALSE, filter = "top",
            caption = "ARGs cuya presencia en T0(W) predice su presencia en T1(W+1) (FDR < 0.05)",
            options = list(pageLength = 15, scrollX = TRUE))

13.2 Volcano plot de predicción

gene_pred %>%
  filter(!is.na(spearman_r), !is.na(p_adj)) %>%
  mutate(
    sig       = p_adj < 0.05 & spearman_r > 0.3,
    label_gen = ifelse(sig & rank(p_adj) <= 15, template, NA)
  ) %>%
  ggplot(aes(x = spearman_r, y = -log10(p_adj + 1e-10),
             color = sig, label = label_gen)) +
  geom_point(alpha = 0.7, size = 2) +
  geom_text_repel(size = 2.5, max.overlaps = 20) +
  geom_vline(xintercept = 0.3, linetype = "dashed", color = "gray50") +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "gray50") +
  scale_color_manual(values = c("FALSE" = "gray70", "TRUE" = "#E64B35")) +
  labs(title = "Poder predictivo gen a gen: T0(W) → T1(W+1)",
       x = "Spearman r (correlación T0 → T1)", y = "-log10(FDR)",
       caption = "Umbral: r > 0.3, FDR < 0.05") +
  theme_classic() + theme(legend.position = "none")

13.3 Predictividad por clase de antibiótico

gene_pred %>%
  filter(!is.na(Class)) %>%
  group_by(Class) %>%
  summarise(
    n_genes       = n(),
    n_predictivos = sum(p_adj < 0.05 & spearman_r > 0.3, na.rm = TRUE),
    pct_pred      = n_predictivos / n_genes * 100,
    mean_r        = mean(spearman_r, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  filter(n_genes >= 3) %>%
  arrange(desc(pct_pred)) %>%
  ggplot(aes(x = reorder(Class, pct_pred), y = pct_pred, fill = mean_r)) +
  geom_col() + coord_flip() +
  scale_fill_gradient2(low = "#4DBBD5", mid = "white", high = "#E64B35", midpoint = 0) +
  labs(title = "% genes predictivos por clase (T0 → T1 siguiente semana)",
       x = NULL, y = "% genes con r > 0.3 y FDR < 0.05", fill = "Mean r") +
  theme_classic()

13.4 Resumen preventivo: clases de mayor riesgo

gene_pred %>%
  filter(p_adj < 0.05, spearman_r > 0.3) %>%
  group_by(Class) %>%
  summarise(
    n_genes_predictivos = n(),
    top_gen             = template[which.max(spearman_r)],
    max_r               = round(max(spearman_r), 3),
    mean_prev_T0        = round(mean(prev_T0), 3),
    mean_prev_T1        = round(mean(prev_T1), 3),
    .groups = "drop"
  ) %>%
  arrange(desc(n_genes_predictivos)) %>%
  datatable(rownames = FALSE,
            caption  = "Clases con genes predictivos: monitorizar en T0 para anticipar T1",
            options  = list(pageLength = 15, scrollX = TRUE))

14 Evolución temporal del resistoma

14.1 Diversidad Shannon a lo largo del tiempo

temporal_div %>%
  mutate(Tipo_label = ifelse(Tipo == "0", "Entrantes (T0)", "Estancia (T1)")) %>%
  ggplot(aes(x = week_abs, y = shannon, color = Tipo_label)) +
  geom_point(alpha = 0.7, size = 2) +
  geom_smooth(method = "loess", span = 0.4, se = TRUE, alpha = 0.15) +
  geom_vline(xintercept = c(
    metadata %>% filter(periodo == "P1_Primavera-Verano2019") %>% pull(week_abs) %>% max(),
    metadata %>% filter(periodo == "P2_Otoño2019") %>% pull(week_abs) %>% max()
  ), linetype = "dashed", color = "grey50", alpha = 0.6) +
  scale_color_manual(values = c("Entrantes (T0)" = "#4DBBD5",
                                "Estancia (T1)"  = "#E64B35")) +
  labs(title  = "Evolución temporal de la diversidad ARG",
       subtitle = "Líneas punteadas = cambio de período (P1→P2→P3)",
       x = "Semana absoluta (week_abs)", y = "Shannon H'", color = NULL) +
  theme_classic() + theme(legend.position = "top")

14.2 Riqueza génica por semana

temporal_div %>%
  mutate(Tipo_label = ifelse(Tipo == "0", "Entrantes (T0)", "Estancia (T1)")) %>%
  ggplot(aes(x = week_abs, y = n_genes, color = Tipo_label)) +
  geom_line(alpha = 0.6) +
  geom_point(size = 2, alpha = 0.8) +
  geom_smooth(method = "loess", span = 0.4, se = FALSE, linetype = "dashed") +
  scale_color_manual(values = c("Entrantes (T0)" = "#4DBBD5",
                                "Estancia (T1)"  = "#E64B35")) +
  labs(title = "Riqueza de ARGs por semana epidemiológica",
       x = "Semana absoluta", y = "N genes ARG", color = NULL) +
  theme_classic() + theme(legend.position = "top")

14.3 Genes endémicos presentes en T1 a lo largo del tiempo

if (length(endemic_genes) > 0 && nrow(core_T1_weekly) > 0) {
  core_T1_weekly %>%
    ggplot(aes(x = week_abs, y = n_endemic, fill = periodo)) +
    geom_col(alpha = 0.8) +
    scale_fill_manual(values = c("P1_Primavera-Verano2019" = "#4DBBD5",
                                 "P2_Otoño2019"            = "#E64B35",
                                 "P3_Invierno2019-2020"    = "#F39B7F")) +
    labs(title  = "Genes endémicos UCI (Core T1) presentes por semana",
         subtitle = paste0("N genes endémicos = ", length(endemic_genes)),
         x = "Semana absoluta", y = "N genes endémicos detectados",
         fill = "Período") +
    theme_classic()
} else {
  cat("No hay suficientes genes endémicos o datos para este gráfico.\n")
}

14.4 Composición por clase a lo largo del tiempo

Tabla_inicial %>%
  filter(database == "ABR", !is.na(week_abs), !is.na(Tipo)) %>%
  left_join(Resfinder_phenotype %>% select(template, Class) %>% distinct(), by = "template") %>%
  filter(!is.na(Class)) %>%
  mutate(Tipo_label = ifelse(Tipo == "0", "Entrantes (T0)", "Estancia (T1)")) %>%
  group_by(week_abs, Tipo_label, Class) %>%
  summarise(n = n_distinct(template), .groups = "drop") %>%
  group_by(week_abs, Tipo_label) %>%
  mutate(pct = n / sum(n) * 100) %>%
  ungroup() %>%
  filter(Class %in% (Resfinder_phenotype %>% count(Class) %>% slice_max(n, n=8) %>% pull(Class))) %>%
  ggplot(aes(x = week_abs, y = pct, fill = Class)) +
  geom_area(position = "stack", alpha = 0.8) +
  facet_wrap(~Tipo_label, ncol = 1) +
  scale_fill_brewer(palette = "Set3") +
  labs(title = "Composición ARG por clase a lo largo del tiempo",
       x = "Semana absoluta", y = "% genes", fill = "Clase") +
  theme_classic() +
  theme(strip.background = element_blank(), legend.position = "bottom")

15 Vigilancia de genes críticos

15.1 Prevalencia semanal de genes críticos

if (nrow(critical_df) > 0) {
  critical_df %>%
    mutate(Tipo_label = ifelse(Tipo == "0", "Entrantes (T0)", "Estancia (T1)")) %>%
    group_by(week_abs, grupo_clinico, Tipo_label) %>%
    summarise(detected = n_distinct(template), .groups = "drop") %>%
    ggplot(aes(x = week_abs, y = detected, color = Tipo_label, group = Tipo_label)) +
    geom_line(alpha = 0.7) +
    geom_point(size = 2) +
    facet_wrap(~grupo_clinico, scales = "free_y", ncol = 2) +
    scale_color_manual(values = c("Entrantes (T0)" = "#4DBBD5",
                                  "Estancia (T1)"  = "#E64B35")) +
    labs(title  = "Genes de alto impacto clínico — presencia por semana",
         x = "Semana absoluta", y = "N genes detectados", color = NULL) +
    theme_classic() +
    theme(strip.background = element_blank(), legend.position = "top")
} else {
  cat("No se detectaron genes en las categorías clínicas definidas.\n")
}

15.2 mecA: profundidad de cobertura T0 vs T1

meca_df <- Tabla_inicial %>%
  filter(database == "ABR", grepl("mecA", template, ignore.case = TRUE),
         !is.na(week_abs)) %>%
  mutate(Tipo_label = ifelse(Tipo == "0", "Entrantes (T0)", "Estancia (T1)"))

if (nrow(meca_df) > 0) {
  p1 <- ggplot(meca_df, aes(x = week_abs, y = depth, color = Tipo_label)) +
    geom_point(size = 2, alpha = 0.8) +
    geom_smooth(method = "loess", se = FALSE, span = 0.5, linetype = "dashed") +
    scale_color_manual(values = c("Entrantes (T0)" = "#4DBBD5",
                                  "Estancia (T1)"  = "#E64B35")) +
    labs(title = "Profundidad mecA por semana",
         x = "Semana absoluta", y = "Depth (KMA)", color = NULL) +
    theme_classic()

  p2 <- ggplot(meca_df, aes(x = Tipo_label, y = depth, fill = Tipo_label)) +
    geom_boxplot(outlier.size = 1) +
    geom_jitter(width = 0.1, alpha = 0.4) +
    scale_fill_manual(values = c("Entrantes (T0)" = "#4DBBD5",
                                 "Estancia (T1)"  = "#E64B35")) +
    stat_compare_means(method = "wilcox.test") +
    labs(title = "Depth mecA: T0 vs T1", x = NULL, y = "Depth", fill = NULL) +
    theme_classic() + theme(legend.position = "none")

  cowplot::plot_grid(p1, p2, rel_widths = c(2, 1))
} else {
  cat("mecA no detectado en los datos filtrados.\n")
}

15.3 Tabla genes críticos detectados

if (nrow(critical_df) > 0) {
  critical_df %>%
    group_by(grupo_clinico, template) %>%
    summarise(
      n_muestras    = n_distinct(dataset),
      n_T0          = n_distinct(dataset[Tipo == "0"]),
      n_T1          = n_distinct(dataset[Tipo == "1"]),
      mean_depth    = round(mean(depth), 1),
      max_depth     = round(max(depth), 1),
      semanas_det   = n_distinct(week_abs),
      .groups = "drop"
    ) %>%
    arrange(grupo_clinico, desc(n_muestras)) %>%
    datatable(rownames = FALSE, filter = "top",
              caption = "Genes clínicamente críticos detectados",
              options = list(pageLength = 20, scrollX = TRUE))
} else {
  cat("No se detectaron genes críticos.\n")
}

16 Presión de selección intraUCI

16.1 MA-plot: amplificación vs importación

depth_comparison %>%
  filter(!is.na(log2_ratio)) %>%
  mutate(
    categoria = case_when(
      amplificado_UCI ~ "Amplificado UCI",
      importado       ~ "Importado (T0)",
      TRUE            ~ "Neutro"
    ),
    label = ifelse(abs(log2_ratio) > 2 & (n_det_T0 + n_det_T1) > 10, template, NA)
  ) %>%
  ggplot(aes(
    x = log2((median_depth_T0 + median_depth_T1) / 2),
    y = log2_ratio,
    color = categoria,
    label = label
  )) +
  geom_point(alpha = 0.7, size = 2) +
  geom_hline(yintercept = c(-1, 1), linetype = "dashed", color = "grey50") +
  geom_hline(yintercept = 0, color = "black", alpha = 0.5) +
  geom_text_repel(size = 2.5, max.overlaps = 15, show.legend = FALSE) +
  scale_color_manual(values = c(
    "Amplificado UCI" = "#E64B35",
    "Importado (T0)"  = "#4DBBD5",
    "Neutro"          = "#B0B0B0"
  )) +
  labs(
    title    = "Presión de selección intraUCI: depth T1 vs T0",
    subtitle = "Arriba = amplificado en UCI | Abajo = más abundante en entrantes",
    x        = "log2(depth promedio)",
    y        = "log2(depth T1 / depth T0)",
    color    = NULL
  ) +
  theme_classic() + theme(legend.position = "top")

16.2 Distribución de ratios por clase de antibiótico

depth_comparison %>%
  filter(!is.na(Class), !is.na(log2_ratio)) %>%
  group_by(Class) %>%
  filter(n() >= 3) %>%
  ungroup() %>%
  mutate(Class = reorder(Class, log2_ratio, FUN = median)) %>%
  ggplot(aes(x = Class, y = log2_ratio, fill = median(log2_ratio) > 0)) +
  geom_boxplot(outlier.size = 1, show.legend = FALSE) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "grey40") +
  coord_flip() +
  scale_fill_manual(values = c("TRUE" = "#E64B35", "FALSE" = "#4DBBD5")) +
  labs(
    title = "Ratio de amplificación UCI por clase de antibiótico",
    subtitle = "Rojo = más abundante en T1 (UCI amplifica) | Azul = más en T0 (importado)",
    x = NULL, y = "log2(depth T1 / depth T0)"
  ) +
  theme_classic()

16.3 Tabla genes amplificados en UCI

depth_comparison %>%
  filter(amplificado_UCI | importado) %>%
  mutate(
    categoria = ifelse(amplificado_UCI, "Amplificado UCI", "Importado (T0)"),
    across(where(is.numeric), ~round(., 2))
  ) %>%
  select(template, Class, categoria, median_depth_T0, median_depth_T1,
         log2_ratio, n_det_T0, n_det_T1) %>%
  arrange(desc(abs(log2_ratio))) %>%
  datatable(rownames = FALSE, filter = "top",
            caption  = "Genes con presión de selección diferencial T0 vs T1",
            options  = list(pageLength = 20, scrollX = TRUE))

17 Clasificación origen de genes T1

17.1 Distribución de orígenes

gene_origin %>%
  count(origen) %>%
  mutate(origen = reorder(origen, n)) %>%
  ggplot(aes(x = origen, y = n, fill = origen)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = n), hjust = -0.2, size = 4) +
  coord_flip() +
  scale_fill_manual(values = c(
    "Endémico UCI"        = "#E64B35",
    "Amplificado intraUCI"= "#F39B7F",
    "Importado"           = "#4DBBD5",
    "Fugaz (solo T0)"     = "#91D1C2",
    "Transitorio"         = "#B0B0B0"
  )) +
  labs(title    = "Origen de los genes ARG detectados en la UCI",
       subtitle = "Basado en prevalencia relativa en T0 (entrantes) vs T1 (estancia UCI)",
       x = NULL, y = "N genes") +
  theme_classic() +
  expand_limits(y = max(table(gene_origin$origen)) * 1.15)

17.2 Scatter prevalencia T0 vs T1

gene_origin %>%
  filter(prev_T0 > 0 | prev_T1 > 0) %>%
  mutate(label = ifelse(
    (prev_T1 > 60 | prev_T0 > 60 | abs(prev_T1 - prev_T0) > 40),
    template, NA
  )) %>%
  ggplot(aes(x = prev_T0, y = prev_T1, color = origen, label = label)) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "grey50") +
  geom_point(alpha = 0.7, size = 2.5) +
  geom_text_repel(size = 2.5, max.overlaps = 15, show.legend = FALSE) +
  scale_color_manual(values = c(
    "Endémico UCI"        = "#E64B35",
    "Amplificado intraUCI"= "#F39B7F",
    "Importado"           = "#4DBBD5",
    "Fugaz (solo T0)"     = "#91D1C2",
    "Transitorio"         = "#B0B0B0"
  )) +
  labs(
    title    = "Prevalencia T0 vs T1 por gen",
    subtitle = "Línea diagonal = igual prevalencia | Sobre la línea = amplificado en UCI",
    x = "Prevalencia en T0 (%)", y = "Prevalencia en T1 (%)", color = "Origen"
  ) +
  theme_classic() + theme(legend.position = "right")

17.3 Genes endémicos y amplificados por clase

gene_origin %>%
  filter(origen %in% c("Endémico UCI", "Amplificado intraUCI"), !is.na(Class)) %>%
  count(origen, Class) %>%
  ggplot(aes(x = reorder(Class, n), y = n, fill = origen)) +
  geom_col(position = "dodge") +
  coord_flip() +
  scale_fill_manual(values = c("Endémico UCI" = "#E64B35",
                               "Amplificado intraUCI" = "#F39B7F")) +
  labs(title = "Clases de antibiótico: genes endémicos y amplificados",
       x = NULL, y = "N genes", fill = NULL) +
  theme_classic() + theme(legend.position = "top")

17.4 Tabla completa de clasificación

gene_origin %>%
  filter(prev_T0 > 0 | prev_T1 > 0) %>%
  mutate(across(where(is.numeric), ~round(., 1))) %>%
  arrange(origen, desc(prev_T1)) %>%
  datatable(rownames = FALSE, filter = "top",
            caption  = "Clasificación de origen de todos los ARGs detectados",
            options  = list(pageLength = 20, scrollX = TRUE))

18 Información de sesión

sessionInfo()
## R version 4.5.3 (2026-03-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.3
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=es_ES.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_ES.UTF-8        LC_COLLATE=es_ES.UTF-8    
##  [5] LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=es_ES.UTF-8   
##  [7] LC_PAPER=es_ES.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Madrid
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggstatsplot_0.13.4 ggrepel_0.9.6      broom_1.0.11       microbiome_1.30.0 
##  [5] ggpubr_0.6.2       reshape2_1.4.5     microbial_0.0.22   phyloseq_1.52.0   
##  [9] ggsci_4.2.0        viridis_0.6.5      viridisLite_0.4.2  DT_0.34.0         
## [13] plotly_4.11.0      lubridate_1.9.4    forcats_1.0.1      stringr_1.6.0     
## [17] dplyr_1.1.4        purrr_1.2.1        readr_2.1.6        tidyr_1.3.2       
## [21] tibble_3.3.1       tidyverse_2.0.0    pheatmap_1.0.13    umap_0.2.10.0     
## [25] factoextra_1.0.7   ggplot2_4.0.1      vegan_2.7-2        permute_0.9-8     
## [29] MASS_7.3-65       
## 
## loaded via a namespace (and not attached):
##   [1] splines_4.5.3               prismatic_1.1.2            
##   [3] datawizard_1.3.0            lifecycle_1.0.5            
##   [5] rstatix_0.7.3               edgeR_4.6.3                
##   [7] lattice_0.22-7              vroom_1.6.7                
##   [9] crosstalk_1.2.2             insight_1.4.4              
##  [11] backports_1.5.0             magrittr_2.0.4             
##  [13] limma_3.64.3                sass_0.4.10                
##  [15] rmarkdown_2.30              jquerylib_0.1.4            
##  [17] yaml_2.3.12                 otel_0.2.0                 
##  [19] askpass_1.2.1               reticulate_1.44.1          
##  [21] cowplot_1.2.0               RColorBrewer_1.1-3         
##  [23] ade4_1.7-23                 abind_1.4-8                
##  [25] Rtsne_0.17                  quadprog_1.5-8             
##  [27] GenomicRanges_1.60.0        BiocGenerics_0.54.1        
##  [29] GenomeInfoDbData_1.2.14     IRanges_2.42.0             
##  [31] S4Vectors_0.46.0            correlation_0.8.8          
##  [33] RSpectra_0.16-2             codetools_0.2-20           
##  [35] DelayedArray_0.34.1         tidyselect_1.2.1           
##  [37] UCSC.utils_1.4.0            farver_2.1.2               
##  [39] effectsize_1.0.1            matrixStats_1.5.0          
##  [41] stats4_4.5.3                jsonlite_2.0.0             
##  [43] multtest_2.64.0             Formula_1.2-5              
##  [45] survival_3.8-6              iterators_1.0.14           
##  [47] emmeans_2.0.1               foreach_1.5.2              
##  [49] tools_4.5.3                 Rcpp_1.1.1                 
##  [51] glue_1.8.0                  gridExtra_2.3              
##  [53] SparseArray_1.8.1           xfun_0.55                  
##  [55] mgcv_1.9-4                  DESeq2_1.48.2              
##  [57] MatrixGenerics_1.20.0       GenomeInfoDb_1.44.3        
##  [59] withr_3.0.2                 fastmap_1.2.0              
##  [61] boot_1.3-32                 rhdf5filters_1.20.0        
##  [63] openssl_2.3.4               digest_0.6.39              
##  [65] timechange_0.3.0            R6_2.6.1                   
##  [67] estimability_1.5.1          generics_0.1.4             
##  [69] data.table_1.18.0           httr_1.4.7                 
##  [71] htmlwidgets_1.6.4           S4Arrays_1.8.1             
##  [73] parameters_0.28.3           pkgconfig_2.0.3            
##  [75] gtable_0.3.6                statsExpressions_1.7.2     
##  [77] S7_0.2.1                    XVector_0.48.0             
##  [79] htmltools_0.5.9             carData_3.0-5              
##  [81] biomformat_1.36.0           scales_1.4.0               
##  [83] Biobase_2.68.0              png_0.1-8                  
##  [85] knitr_1.51                  rstudioapi_0.17.1          
##  [87] tzdb_0.5.0                  coda_0.19-4.1              
##  [89] nlme_3.1-168                cachem_1.1.0               
##  [91] rhdf5_2.52.1                parallel_4.5.3             
##  [93] pillar_1.11.1               grid_4.5.3                 
##  [95] vctrs_0.6.5                 randomForest_4.7-1.2       
##  [97] car_3.1-3                   xtable_1.8-4               
##  [99] cluster_2.1.8.2             paletteer_1.7.0            
## [101] evaluate_1.0.5              zeallot_0.2.0              
## [103] mvtnorm_1.3-3               cli_3.6.5                  
## [105] locfit_1.5-9.12             compiler_4.5.3             
## [107] rlang_1.1.7                 crayon_1.5.3               
## [109] rstantools_2.6.0            ggsignif_0.6.4             
## [111] labeling_0.4.3              rematch2_2.1.2             
## [113] plyr_1.8.9                  stringi_1.8.7              
## [115] BiocParallel_1.42.2         Biostrings_2.76.0          
## [117] lazyeval_0.2.2              bayestestR_0.17.0          
## [119] Matrix_1.7-4                hms_1.1.4                  
## [121] patchwork_1.3.2             bit64_4.6.0-1              
## [123] Rhdf5lib_1.30.0             statmod_1.5.1              
## [125] SummarizedExperiment_1.38.1 igraph_2.2.1               
## [127] RcppParallel_5.1.11-1       bslib_0.9.0                
## [129] phangorn_2.12.1             fastmatch_1.1-6            
## [131] bit_4.6.0                   ape_5.8-1