1 Diversity index

1.1 Shapiro test

# diver_gral <- diver_gral %>%
#   pivot_longer(cols=chao_gral:dominance.simpson_REL, names_to = "cosa", values_to = "value") %>% 
#   separate(cosa, into=c("index", "database2"), sep="_")  

diver_gral <- diver_gral %>%
  pivot_longer(cols=chao_gral:dominance.simpson_BACBiocide, names_to = "cosa", values_to = "value") %>% 
  separate(cosa, into=c("index", "database2"), sep="_")

shapiro <- diver_gral %>%
  group_by(database2, index, Treatment) %>%
  do(tidy(shapiro.test(.$value)))
datatable(shapiro, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )

1.2 Wilcox Test

wilcox <- diver_gral %>% 
  group_by(database2, index) %>% 
  do(tidy(wilcox.test(.$value~.$Treatment)))
datatable(wilcox, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )
student <- diver_gral %>%
  group_by(database2, index) %>%
  do(tidy(t.test(.$value~.$Treatment)))
datatable(student, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )

1.3 Shannon Index

1.4 Chao index

diver_gral %>% filter(index== "chao") %>% 
  ggplot(aes(x=Treatment, y = value,fill=Treatment)) +
  geom_violin()+
  geom_boxplot(width=.2) +
  facet_wrap(.~database2)+
  ggtitle ("chao index") +xlab("Cohort origin") +ylab("chao Index")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

1.5 Dominance simpson

diver_gral %>% filter(index== "dominance.simpson") %>% 
  ggplot(aes(x=Treatment, y = value,fill=Treatment)) +
  geom_violin()+
  geom_boxplot(width=.2) +
  facet_wrap(.~database2)+
  ggtitle ("dominance simpson diversity") +xlab("Cohort origin") +ylab("simpson Index")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

1.6 Eveness simpson

diver_gral %>% filter(index== "evenness.simpson") %>% 
  ggplot(aes(x=Treatment, y = value,fill=Treatment)) +
  geom_violin()+
  geom_boxplot(width=.2) +
  facet_wrap(.~database2)+
  ggtitle ("eveness simpson diversity") +xlab("Cohort origin") +ylab("simpson Index")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

2 PCA

2.1 PCA gral by Origin

as.data.frame(PCA$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
  select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 69.08%") +ylab("PC2 14.57%")

2.2 PCA ABR by origin

as.data.frame(PCA_ABR$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
  select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 53.75%") +ylab("PC2 13.22%")

2.3 PCA BAC by origin

as.data.frame(PCA_BAC$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
  select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 93.44%") +ylab("PC2 2.19%")

2.4 PCA REL by origin

as.data.frame(PCA_REL$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
  select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 81.85%") +ylab("PC2 6.86%")

2.5 PCA BAC Metals by origin

as.data.frame(PCA_BAC_Metal$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
  select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 94.65%") +ylab("PC2 2.17%")

2.6 PCA BAC Biocide by origin

as.data.frame(PCA_BAC_Biocide$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
  select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 95.28%") +ylab("PC2 2.8%")

3 UMAP

HE5, HE152

Guayana_tsne_spearman <- as.matrix(Guayana_tsne_spearman)
Guayana_tsne_spearman_REL <- as.matrix(Guayana_tsne_spearman_REL)
Guayana_tsne_spearman_ABR <- as.matrix(Guayana_tsne_spearman_ABR)
Guayana_tsne_spearman_BAC <- as.matrix(Guayana_tsne_spearman_BAC)
Guayana_tsne_spearman_BAC_Metal = get_dist(Guayana_spread_BAC_Metal, method = "spearman")
Guayana_tsne_spearman_BAC_Biocide = get_dist(Guayana_spread_BAC_Biocide, method = "spearman")

umap_plot <- umap(Guayana_tsne_spearman, input="dist")
umap_plot_REL <- umap(Guayana_tsne_spearman_REL)
umap_plot_ABR <- umap(Guayana_tsne_spearman_ABR)
umap_plot_BAC <- umap(Guayana_tsne_spearman_BAC)
umap_plot_BAC_Metal <- umap(Guayana_spread_BAC_Metal)
umap_plot_BAC_Biocide <- umap(Guayana_spread_BAC_Biocide)


umap_plot <- as.data.frame(umap_plot$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_REL <- as.data.frame(umap_plot_REL$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_ABR <- as.data.frame(umap_plot_ABR$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_BAC <- as.data.frame(umap_plot_BAC$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_BAC_Metal <- as.data.frame(umap_plot_BAC_Metal$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_BAC_Biocide <- as.data.frame(umap_plot_BAC_Biocide$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)

3.1 UMAP general by origin

3.2 UMAP ABR by origin

umap_plot_ABR %>%
  select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
  ggtitle ("UMAP ABR")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

3.3 UMAP BAC by origin

umap_plot_BAC %>%
  select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
  ggtitle ("UMAP BAC")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

3.4 UMAP REL by origin

umap_plot_REL %>%
  select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
  ggtitle ("UMAP REL")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

3.5 UMAP BAC Metal by origin

umap_plot_BAC_Metal %>%
  select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
  ggtitle ("UMAP BAC Metal")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

3.6 UMAP BAC Biocide by origin

umap_plot_BAC_Biocide %>%
  select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
  ggtitle ("UMAP BAC biocide")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

4 T-SNE

4.1 T-SNE general by origin

ggplot(tsne_spearman_30_plot) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE")

4.2 T-SNE ABR by origin

ggplot(tsne_spearman_30_plot_ABR) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE ABR")

4.3 T-SNE BAC by origin

ggplot(tsne_spearman_30_plot_BAC) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE BAC")

4.4 T-SNE REL by origin

ggplot(tsne_spearman_30_plot_REL) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE REL")

5 Adonis

5.1 Adonis all

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = Guayana_spread_t[, 1:918] ~ origin, data = Guayana_spread_t, permutations = 999)
##           Df SumOfSqs      R2      F Pr(>F)    
## origin     1   1.7207 0.09418 12.685  0.001 ***
## Residual 122  16.5498 0.90582                  
## Total    123  18.2705 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5.2 Adonis ABR

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = Guayana_spread_ABR_t[, 1:342] ~ origin, data = Guayana_spread_ABR_t, permutations = 999)
##           Df SumOfSqs      R2      F Pr(>F)    
## origin     1   2.0269 0.16245 23.662  0.001 ***
## Residual 122  10.4503 0.83755                  
## Total    123  12.4771 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5.3 Adonis BAC

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = Guayana_spread_BAC_t[, 1:232] ~ origin, data = Guayana_spread_BAC_t, permutations = 999)
##           Df SumOfSqs      R2     F Pr(>F)
## origin     1    0.484 0.01395 1.726  0.115
## Residual 122   34.213 0.98605             
## Total    123   34.697 1.00000

5.4 Adonis REL

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = Guayana_spread_REL_t[, 1:344] ~ origin, data = Guayana_spread_REL_t, permutations = 999)
##           Df SumOfSqs     R2     F Pr(>F)    
## origin     1   3.7312 0.1342 18.91  0.001 ***
## Residual 122  24.0716 0.8658                 
## Total    123  27.8028 1.0000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5.5 Adonis BAC metal

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = Guayana_spread_BAC_Metal_t[, 1:144] ~ origin, data = Guayana_spread_REL_t, permutations = 999)
##           Df SumOfSqs      R2     F Pr(>F)  
## origin     1    0.581 0.01657 2.055  0.068 .
## Residual 122   34.494 0.98343               
## Total    123   35.075 1.00000               
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5.6 Adonis BAC biocide

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = Guayana_spread_BAC_Biocide_t[, 1:109] ~ origin, data = Guayana_spread_REL_t, permutations = 999)
##           Df SumOfSqs      R2      F Pr(>F)
## origin     1    0.291 0.00848 1.0438  0.335
## Residual 122   33.993 0.99152              
## Total    123   34.284 1.00000

6 Different genes stats

6.1 Quantity of distinct genes by origin

6.2 Mean of distinct genes by origin per person

6.3 Proportion unique genes

6.4 Proportion unique genes per person

6.5 Total unique genes per person

6.6 genes per million

6.7 Rarefaction

6.8 Abundance vs Depth

7 Tables of Genes

7.1 Common genes ABR

7.2 not in common genes ABR

7.3 Common genes BAC

7.4 not in common genes BAC

7.5 Common genes REL

7.6 not in common genes REL

7.7 Common genes all

7.8 not in common genes

8 Core genes

8.1 Core genes all

tabla_core <- Tabla_inicial %>% filter(Treatment!="Treat")%>% group_by(template) %>% summarise(TotalFreq = n())%>% ungroup() %>% filter(TotalFreq == 124)
datatable(tabla_core, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )

8.2 Core genes europe

tabla_core_eur <- Tabla_inicial %>% filter(Treatment=="NO-Treat") %>% group_by(template) %>% summarise(TotalFreq = n())%>% ungroup() %>% filter(TotalFreq == 29)
datatable(tabla_core_eur, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )

8.3 Core genes guayana

tabla_core_guayana <- Tabla_inicial %>% filter(Treatment=="guayana") %>% group_by(template) %>% summarise(TotalFreq = n())%>% ungroup() %>% filter(TotalFreq == 95)
datatable(tabla_core_guayana, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )

9 Summary of genes

9.1 summary of gene distribution

ABR_common_genes <- tabla_para_teresa_ABR %>%  group_by(Label) %>% summarise(number = n())
BAC_common_genes <- tabla_para_teresa_BAC %>%  group_by(Label) %>% summarise(number = n())
REL_common_genes <- tabla_para_teresa_REL %>%  group_by(Label) %>% summarise(number = n())

ABR_notcommon_genes <- anti_tabla_para_teresa_ABR %>%  group_by(Label) %>% summarise(number = n())
BAC_notcommon_genes <- anti_tabla_para_teresa_BAC %>%  group_by(Label) %>% summarise(number = n())
REL_notcommon_genes <- anti_tabla_para_teresa_REL %>%  group_by(Label) %>% summarise(number = n())

Tipo <- c('ABR','BAC','REL')
aaEur <- c(109,39,76)
All <- c(155,167,182)
Gui <- c(79,26,86)
mix_num_genes <- data.frame( Tipo, aaEur,All,Gui)
mix_num_genes %>% pivot_longer(cols=aaEur:Gui, names_to = "cosa", values_to = "value")%>% 
  ggplot(aes(x = cosa, y = value, fill = Tipo))+geom_col(position = "dodge")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

# Tipo <- c('ABR','BAC','REL')
# aaEur <- c(10,5,2)
# All <- c(10,2,0)
# Gui <- c(14,2,8)
# core_num_genes <- data.frame( Tipo, aaEur,All,Gui)
# 
# core_num_genes %>% pivot_longer(cols=aaEur:Gui, names_to = "cosa", values_to = "value")%>% 
#   ggplot(aes(x = cosa, y = value, fill = Tipo))+geom_col(position = "dodge")+
#   theme_classic()+
#   theme(panel.background = element_blank(), panel.grid = element_blank(), 
#         aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
#         axis.text.x = element_text(angle = 45,  hjust=1))

9.2 Core genes

10 Hist of Genes

10.1 hist all genes

10.2 hist all genes europe_t0

10.3 hist all genes guayana

10.4 hist ABR

10.5 hist BAC

10.6 hist REL

10.7 hist ABR Guayana

10.8 hist BAC Guayana

10.9 hist REL Guayana

10.10 hist ABR Europe

10.11 hist BAC Europe

10.12 hist REL Europe

11 Resistance genes diversity in population

11.1 ABR diversity

11.2 REL diversity

11.3 BAC diversity

12 Resistance genes diversity in cohort and origin

12.1 ABR diversity in origin

12.2 REL diversity in origin

12.3 BAC diversity in origin

13 Total freq of Resistance genes

13.1 ABR freq by origin

13.2 depth of BAC by origin

13.3 REL freq by origin

14 Total depth of Resistance genes

14.1 depth of ABR by origin

14.2 depth of BAC by origin

14.3 depth of REL by origin

15 CHI-SQUARE PERCENTAGES

#Amerindians <- Tabla_inicial %>%select(origin) %>%  filter(origin == "guayana")

tabla_america <- Tabla_inicial %>% filter(origin == "guayana") %>% group_by(label) %>% distinct(template,label, .keep_all = TRUE) %>% ungroup %>%
  group_by(template) %>% summarize(suma = n())

tabla_europa <- Tabla_inicial %>% filter(Treatment == 'NO-Treat' ) %>% group_by(label) %>% distinct(template,label, .keep_all = TRUE) %>% ungroup %>% 
  group_by(template) %>% summarize(suma = n())

tabla_chi <- full_join(tabla_europa, tabla_america, by= "template") 
colnames(tabla_chi) <- c("template","europe_P", "america_P" )

tabla_chi[is.na(tabla_chi)] <- 0
tabla_chi_filt <- tabla_chi %>% mutate(europe_N = 29 - europe_P ) %>% mutate(america_N = 95 - america_P )#%>% filter(europe_P > 2 | america_P > 9 )

chi_pvalue <- tabla_chi_filt %>%
  rowwise() %>%
  mutate(
    p.value = fisher.test(matrix(c(europe_P, europe_N, america_P, america_N), nrow = 2))$p.value,
    odds_ratio = fisher.test(matrix(c(europe_P, europe_N, america_P, america_N), nrow = 2))$estimate,
  )

colnames(chi_pvalue) <- c("template","europe_P","america_P","europe_N","america_N","p_value", "odds_ratio")
chi_pvalue <- as.data.frame(chi_pvalue)
chi_pvalue.adj <-chi_pvalue %>% mutate(padj= p.adjust(chi_pvalue$p_value, method = "BH")) #%>% filter(padj<0.05)

chi_pvalue.adj <- chi_pvalue.adj %>% mutate(america_odds = europe_P*america_N) %>% mutate(europe_odds = america_P*europe_N)
chi_pvalue.adj <- chi_pvalue.adj %>% mutate(result1= america_odds/europe_odds) %>% mutate(result2 = europe_odds/america_odds)
chi_pvalue.adj <- chi_pvalue.adj %>% mutate(porcentaje_eur=europe_P/(europe_P+europe_N)) %>% mutate(porcentaje_amer=america_P/(america_P+america_N))
chi_pvalue.adj[is.na(chi_pvalue.adj)] <- 0
chi_pvalue.adj$odds_ratio <- ifelse(chi_pvalue.adj$odds_ratio < 1, -1/chi_pvalue.adj$odds_ratio, chi_pvalue.adj$odds_ratio )
chi_pvalue.adj$direction <- ifelse(chi_pvalue.adj$odds_ratio < 0, 'america', 'europe')

chi_pvalue.adj.plot <- chi_pvalue.adj
chi_pvalue.adj.plot$fisher <- ifelse(chi_pvalue.adj.plot$padj<0.05, "yes", "no" ) 
chi_pvalue.adj.plot$label <- ifelse(chi_pvalue.adj.plot$padj<0.000000000000001, chi_pvalue.adj.plot$template, "" ) 

chi_pvalue.adj.plot %>% ggplot(aes(porcentaje_eur, porcentaje_amer, color= fisher))+geom_point()+
  geom_text_repel(aes(label=label),hjust = 0, nudge_x = 0.05, max.overlaps = 99)+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

chi_pvalue.adj <-chi_pvalue.adj %>% filter(padj<0.05)


chi_pvalue.adj$database = "ABR" 
chi_pvalue.adj$database[grepl("^BAC",chi_pvalue.adj$template)] = "BAC" 
chi_pvalue.adj$database[grepl("^MOB",chi_pvalue.adj$template)] = "REL" 

# chi_pvalue_rel <- chi_pvalue.adj %>% filter(str_detect(template, "^MOB"))
# chi_pvalue_rel_short <- chi_pvalue.adj %>% filter(str_detect(template, "^MOB"))
# chi_pvalue_rel_short$template <-  sub( "_.*" ," ", chi_pvalue_rel_short$template)
chi_pvalue_rel <- chi_pvalue.adj %>%filter(database == "REl")
chi_pvalue_rel_short <- chi_pvalue.adj %>%filter(database == "REL")
chi_pvalue_rel_short$template <-  sub( "_.*" ," ", chi_pvalue_rel_short$template)


chi_pvalue_abr <- chi_pvalue.adj %>% filter(database == "ABR")
chi_pvalue_abr_short <- chi_pvalue.adj %>%filter(database == "ABR")
#no se puede simplificar sin perder info
# chi_pvalue_abr_short$template <-  sub( ":.*" ," ", chi_pvalue_abr_short$template)
# chi_pvalue_abr_short$template <-  sub( "(_[^_]+)_.*" ,"\\1", chi_pvalue_abr_short$template)
# chi_pvalue_abr_short$template  = sapply(chi_pvalue_abr_short$template , function(x) paste(strsplit(x, "_")[[1]][1:2], collapse = '_'))



chi_pvalue_bac <- chi_pvalue.adj %>% filter(database == "BAC")
# chi_pvalue_bac_short <- chi_pvalue.adj %>% filter(database == "BAC") %>% left_join(Bac_met_db, by = 'template')
# chi_pvalue_bac_short$template <-  sub( "\\|+" ,"-", chi_pvalue_bac_short$template)
# chi_pvalue_bac_short$template <-  sub( "(\\|[^\\|]+)\\|.*" ,"", chi_pvalue_bac_short$template)

15.1 ABR Percentages by padj 2 colors

chi_pvalue_abr_short_longzzz <- chi_pvalue_abr_short %>% 
  select(template,porcentaje_eur,porcentaje_amer) %>%
  melt( id.vars = 'template')

chi_pvalue_abr_short_longzzz$value <- ifelse(chi_pvalue_abr_short_longzzz$variable == 'porcentaje_eur', chi_pvalue_abr_short_longzzz$value*-1, chi_pvalue_abr_short_longzzz$value)

joinedabr <- full_join(chi_pvalue_abr_short_longzzz, chi_pvalue_abr_short, by='template')

joinedabrz <- joinedabr %>% arrange(desc(value))

americcc <- joinedabrz %>% filter(variable == 'porcentaje_amer')
europpp <- joinedabrz %>% filter(variable == 'porcentaje_eur')

ggplot() +
  geom_col(data=americcc, aes(reorder(template, -padj), value, fill=padj)) +
  scale_fill_viridis_c(option = "D") +
  new_scale_fill() +
  geom_col(data= europpp, aes(reorder(template, -padj), value, fill=padj)) +
  scale_fill_viridis_c(option = "C")+
  coord_flip()

15.2 BAC Percentages by padj 2 colors

chi_pvalue_BAC_short_longzzz <- chi_pvalue_bac %>% 
  select(template,porcentaje_eur,porcentaje_amer) %>%
  melt( id.vars = 'template')

chi_pvalue_BAC_short_longzzz$value <- ifelse(chi_pvalue_BAC_short_longzzz$variable == 'porcentaje_eur', chi_pvalue_BAC_short_longzzz$value*-1, chi_pvalue_BAC_short_longzzz$value)

chi_pvalue_rel_short <- chi_pvalue_rel_short %>% arrange(desc(odds_ratio))
joinedBAC <- full_join(chi_pvalue_BAC_short_longzzz, chi_pvalue_bac, by='template')
joinedBACz <- joinedBAC %>% arrange(desc(value))

americcc <- joinedBACz %>% filter(variable == 'porcentaje_amer')
europpp <- joinedBACz %>% filter(variable == 'porcentaje_eur')

ggplot() +
  geom_col(data=americcc, aes(reorder(template, -padj), value, fill=padj)) +
  scale_fill_viridis_c(option = "D") +
  new_scale_fill() +
  geom_col(data= europpp, aes(reorder(template, -padj), value, fill=padj)) +
  scale_fill_viridis_c(option = "C")+
  coord_flip()

15.3 REL Percentages by padj 2 colors

chi_pvalue_REL_short_longzzz <- chi_pvalue_rel_short %>% 
  select(template,porcentaje_eur,porcentaje_amer) %>%
  melt( id.vars = 'template')

chi_pvalue_REL_short_longzzz$value <- ifelse(chi_pvalue_REL_short_longzzz$variable == 'porcentaje_eur', chi_pvalue_REL_short_longzzz$value*-1, chi_pvalue_REL_short_longzzz$value)

chi_pvalue_rel_short <- chi_pvalue_rel_short %>% arrange(desc(odds_ratio))
joinedREL <- full_join(chi_pvalue_REL_short_longzzz, chi_pvalue_rel_short, by='template')

joinedRELz <- joinedREL %>% arrange(desc(value))

americcc <- joinedRELz %>% filter(variable == 'porcentaje_amer')
europpp <- joinedRELz %>% filter(variable == 'porcentaje_eur')


ggplot() +
  geom_col(data=americcc, aes(reorder(template, -padj), value, fill=padj)) +
  scale_fill_viridis_c(option = "D") +
  new_scale_fill() +
  geom_col(data= europpp, aes(reorder(template, -padj), value, fill=padj)) +
  scale_fill_viridis_c(option = "C")+
  coord_flip()

16 Lefse

16.1 Lefse general

16.2 Lefse ABR

plotLDA(res_origin_ABR_LEFSE,group=c("europa","guayana"),lda=3,pvalue=0.05)

16.3 Lefse BAC

plotLDA(res_origin_BAC_LEFSE,group=c("europa","guayana"),lda=4,pvalue=0.05)

16.4 Lefse REL

plotLDA(res_origin_REL_LEFSE,group=c("europa","guayana"),lda=5,pvalue=0.05)

17 HEATMAPS

17.1 ALL Heatmap

pheatmap(log(heat_padj+1), clustering_method = "ward.D2")

17.2 ABR Heatmap

pheatmap(log(heat_padj_abr+1), clustering_method = "ward.D2")

Heatmap of the Antimicrobial resistance genes. The genes that appear on the y axis and the individuals of the x axis were previously selected From an ANOVA test with p-value lower than 0,05 in. Red indicates the highest value of appearance of the gene in the population and blue the contrary.

17.3 REL Heatmap

pheatmap(log(heat_padj_rel+1), clustering_method = "ward.D2") 

Heatmap of the Relaxases resistance genes. The genes that appear on the y axis and the individuals of the x axis were previously selected From an ANOVA test with p-value lower than 0,05 in. Red indicates the highest value of appearance of the gene in the population and blue the contrary.

17.4 BAC Heatmap

# pheatmap(log(theat_padj_bac+1), clustering_method = "ward.D2")
pheatmap(log(heat_padj_bac+1), clustering_method = "ward.D2")

Heatmap of the Biocides and metals resistance genes. The genes that appear on the y axis and the individuals of the x axis were previously selected From an ANOVA test with p-value lower than 0,05 in. Red indicates the highest value of appearance of the gene in the population and blue the contrary.

17.5 ALL Heatmap ALL

pheatmap(log(heat_padj+1), clustering_method = "ward.D2")

17.6 ABR Heatmap ALL

pheatmap(log(heat_padj_abr+1), clustering_method = "ward.D2")

17.7 ALL Heatmap LEFSE

pheatmap(log(heat_padj+1), clustering_method = "ward.D2")

17.8 ABR Heatmap LEFSE

pheatmap(log(heat_padj+1), clustering_method = "ward.D2")

17.9 BAC Heatmap LEFSE

pheatmap(log(heat_padj+1), clustering_method = "ward.D2")

## REL Heatmap LEFSE

pheatmap(log(heat_padj+1), clustering_method = "ward.D2")

17.10 genes mas importantes

res_origin_LEFSE_changed <- res_origin_LEFSE %>% filter(LDAscore >= 2)
colnames(res_origin_LEFSE_changed)[2] <- "genes"
chi_pvalue.adj_changed <- chi_pvalue.adj
colnames(chi_pvalue.adj_changed)[1] <- "genes"
important_genes <- inner_join(chi_pvalue.adj_changed, res_origin_LEFSE_changed, by = "genes")
important_genes <- inner_join(res_filt, chi_pvalue.adj_changed, by = "genes")
important_genes <- inner_join(res_filt, res_origin_LEFSE_changed, by = "genes")
important_genes <- inner_join(important_genes, chi_pvalue.adj_changed, by = "genes")