1 Load data

2 Rarefaction

## [1] 327839

3 Diversity index

3.1 Shapiro test

diver_gral <- diver_gral %>%
  pivot_longer(cols=chao_gral:dominance.simpson_BACBiocide, names_to = "cosa", values_to = "value") %>% 
  separate(cosa, into=c("index", "database2"), sep="_")

shapiro <- diver_gral %>%
  group_by(database2, index, origin) %>%
  do(tidy(shapiro.test(.$value)))
datatable(shapiro, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )

3.2 Wilcox Test

wilcox <- diver_gral %>% 
  group_by(database2, index) %>% 
  do(tidy(wilcox.test(.$value~.$origin)))
datatable(wilcox, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )
student <- diver_gral %>%
  group_by(database2, index) %>%
  do(tidy(t.test(.$value~.$origin)))
datatable(student, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )

3.3 Shannon Index

3.4 Chao index

diver_gral %>% filter(index== "chao") %>% 
  ggplot(aes(x=origin, y = value,fill=origin)) +
  geom_violin()+
  geom_boxplot(width=.2) +
  facet_wrap(.~database2)+
  ggtitle ("chao index") +xlab("Cohort origin") +ylab("chao Index")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

3.5 Dominance simpson

diver_gral %>% filter(index== "dominance.simpson") %>% 
  ggplot(aes(x=origin, y = value,fill=origin)) +
  geom_violin()+
  geom_boxplot(width=.2) +
  facet_wrap(.~database2)+
  ggtitle ("dominance simpson diversity") +xlab("Cohort origin") +ylab("simpson Index")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

3.6 Eveness simpson

diver_gral %>% filter(index== "evenness.simpson") %>% 
  ggplot(aes(x=origin, y = value,fill=origin)) +
  geom_violin()+
  geom_boxplot(width=.2) +
  facet_wrap(.~database2)+
  ggtitle ("eveness simpson diversity") +xlab("Cohort origin") +ylab("simpson Index")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

4 PCA

4.1 PCA gral by Origin

as.data.frame(PCA$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
  select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 47.29%") +ylab("PC2 21.43%")

4.2 PCA ABR by origin

as.data.frame(PCA_ABR$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
  select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 70.76%") +ylab("PC2 6.89%")

4.3 PCA BAC by origin

as.data.frame(PCA_BAC$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
  select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 51.78%") +ylab("PC2 10.88%")

4.4 PCA REL by origin

as.data.frame(PCA_REL$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
  select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 75.26%") +ylab("PC2 12.94%")

4.5 PCA BAC Metals by origin

as.data.frame(PCA_BAC_Metal$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
  select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 50.87%") +ylab("PC2 10.29%")

4.6 PCA BAC Biocide by origin

as.data.frame(PCA_BAC_Biocide$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
  select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 53.32%") +ylab("PC2 10.54%")

5 UMAP

HE5, HE152

Guayana_tsne_spearman <- as.matrix(Guayana_tsne_spearman)
Guayana_tsne_spearman_REL <- as.matrix(Guayana_tsne_spearman_REL)
Guayana_tsne_spearman_ABR <- as.matrix(Guayana_tsne_spearman_ABR)
Guayana_tsne_spearman_BAC <- as.matrix(Guayana_tsne_spearman_BAC)
Guayana_tsne_spearman_BAC_Metal = get_dist(Guayana_spread_BAC_Metal, method = "spearman")
Guayana_tsne_spearman_BAC_Biocide = get_dist(Guayana_spread_BAC_Biocide, method = "spearman")

umap_plot <- umap(Guayana_tsne_spearman, input="dist")
umap_plot_REL <- umap(Guayana_tsne_spearman_REL)
umap_plot_ABR <- umap(Guayana_tsne_spearman_ABR)
umap_plot_BAC <- umap(Guayana_tsne_spearman_BAC)
umap_plot_BAC_Metal <- umap(Guayana_spread_BAC_Metal)
umap_plot_BAC_Biocide <- umap(Guayana_spread_BAC_Biocide)

umap_plot <- as.data.frame(umap_plot$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_REL <- as.data.frame(umap_plot_REL$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_ABR <- as.data.frame(umap_plot_ABR$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_BAC <- as.data.frame(umap_plot_BAC$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_BAC_Metal <- as.data.frame(umap_plot_BAC_Metal$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_BAC_Biocide <- as.data.frame(umap_plot_BAC_Biocide$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)

5.1 UMAP general by origin

5.2 UMAP ABR by origin

umap_plot_ABR %>%
  select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
  ggtitle ("UMAP ABR")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

5.3 UMAP BAC by origin

umap_plot_BAC %>%
  select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
  ggtitle ("UMAP BAC")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

5.4 UMAP REL by origin

umap_plot_REL %>%
  select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
  ggtitle ("UMAP REL")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

5.5 UMAP BAC Metal by origin

umap_plot_BAC_Metal %>%
  select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
  ggtitle ("UMAP BAC Metal")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

5.6 UMAP BAC Biocide by origin

umap_plot_BAC_Biocide %>%
  select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
  ggtitle ("UMAP BAC biocide")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

6 T-SNE

6.1 T-SNE general by origin

ggplot(tsne_spearman_30_plot) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE")

6.2 T-SNE ABR by origin

ggplot(tsne_spearman_30_plot_ABR) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE ABR")

6.3 T-SNE BAC by origin

ggplot(tsne_spearman_30_plot_BAC) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE BAC")

6.4 T-SNE REL by origin

ggplot(tsne_spearman_30_plot_REL) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE REL")

6.5 T-SNE Metal by origin

ggplot(tsne_spearman_30_plot_BAC_Metal) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE metal")

6.6 T-SNE biocide by origin

ggplot(tsne_spearman_30_plot_BAC_Biocide) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE biocide")

7 Adonis

7.1 Adonis all

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = Guayana_spread_t[, 1:5346] ~ origin, data = Guayana_spread_t, permutations = 999)
##           Df SumOfSqs      R2      F Pr(>F)    
## origin     1   2.0268 0.09333 12.559  0.001 ***
## Residual 122  19.6891 0.90667                  
## Total    123  21.7158 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7.2 Adonis ABR

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = Guayana_spread_ABR_t[, 1:367] ~ origin, data = Guayana_spread_ABR_t, permutations = 999)
##           Df SumOfSqs      R2      F Pr(>F)    
## origin     1   1.7495 0.12136 16.851  0.001 ***
## Residual 122  12.6664 0.87864                  
## Total    123  14.4159 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7.3 Adonis BAC

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = Guayana_spread_BAC_t[, 1:4576] ~ origin, data = Guayana_spread_BAC_t, permutations = 999)
##           Df SumOfSqs      R2      F Pr(>F)    
## origin     1    1.843 0.03401 4.2953  0.001 ***
## Residual 122   52.355 0.96599                  
## Total    123   54.198 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7.4 Adonis REL

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = Guayana_spread_REL_t[, 1:403] ~ origin, data = Guayana_spread_REL_t, permutations = 999)
##           Df SumOfSqs      R2      F Pr(>F)    
## origin     1   3.8006 0.14208 20.205  0.001 ***
## Residual 122  22.9487 0.85792                  
## Total    123  26.7493 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7.5 Adonis BAC metal

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = Guayana_spread_BAC_Metal_t[, 1:3027] ~ origin, data = Guayana_spread_BAC_Metal_t, permutations = 999)
##           Df SumOfSqs      R2     F Pr(>F)    
## origin     1    1.209 0.02248 2.805  0.001 ***
## Residual 122   52.578 0.97752                 
## Total    123   53.787 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7.6 Adonis BAC biocide

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = Guayana_spread_BAC_Biocide_t[, 1:2068] ~ origin, data = Guayana_spread_BAC_Biocide_t, permutations = 999)
##           Df SumOfSqs      R2      F Pr(>F)    
## origin     1    1.979 0.03645 4.6157  0.001 ***
## Residual 122   52.302 0.96355                  
## Total    123   54.281 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

8 Different genes stats

8.1 Quantity of distinct genes by origin

8.2 Mean of distinct genes by origin per person

8.3 Proportion unique genes

8.4 Proportion unique genes per person

8.5 Total unique genes per person

8.6 genes per million

8.7 Rarefaction

8.8 Abundance vs Depth

9 Tables of Genes

9.1 Common genes ABR

9.2 not in common genes ABR

9.3 Common genes BAC

9.4 not in common genes BAC

9.5 Common genes REL

9.6 not in common genes REL

9.7 Common genes all

9.8 not in common genes

10 Core genes

10.1 Core genes all

tabla_core <- Tabla_inicial%>% group_by(template) %>% summarise(TotalFreq = n())%>% ungroup() %>% filter(TotalFreq == 124)
datatable(tabla_core, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )

10.2 Core genes europe

tabla_core_eur <- Tabla_inicial %>% filter(origin=="europa") %>% group_by(template) %>% summarise(TotalFreq = n())%>% ungroup() %>% filter(TotalFreq == 29)
datatable(tabla_core_eur, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )

10.3 Core genes guayana

tabla_core_guayana <- Tabla_inicial %>% filter(origin=="guayana") %>% group_by(template) %>% summarise(TotalFreq = n())%>% ungroup() %>% filter(TotalFreq == 95)
datatable(tabla_core_guayana, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )

11 Summary of genes

11.1 summary of gene distribution

ABR_common_genes <- tabla_para_teresa_ABR %>%  group_by(Label) %>% summarise(number = n())
# BAC_common_genes<- tabla_para_teresa_BAC  %>%  group_by(Label, template_gene) %>% summarise(number = n())
BAC_common_genes<-tabla_para_teresa_BAC %>% distinct(template,Label,TotalFreq) %>%  group_by(Label) %>% summarise(number = n())
REL_common_genes <- tabla_para_teresa_REL %>%  group_by(Label) %>% summarise(number = n())

ABR_notcommon_genes <- anti_tabla_para_teresa_ABR %>%  group_by(Label) %>% summarise(number = n())
# BAC_notcommon_genes <- anti_tabla_para_teresa_BAC %>%  group_by(Label) %>% summarise(number = n())
BAC_notcommon_genes <- anti_tabla_para_teresa_BAC %>%distinct(template,Label,TotalFreq) %>%  group_by(Label) %>% summarise(number = n())
REL_notcommon_genes <- anti_tabla_para_teresa_REL %>%  group_by(Label) %>% summarise(number = n())

Tipo <- c('ABR','BAC','REL')
aaEur <- c(ABR_notcommon_genes$number[1],BAC_notcommon_genes$number[1],REL_notcommon_genes$number[1])
All <- c( ABR_common_genes$number[1],BAC_common_genes$number[1],REL_common_genes$number[1])
Gui <- c(ABR_notcommon_genes$number[2],BAC_notcommon_genes$number[2],REL_notcommon_genes$number[2])
mix_num_genes <- data.frame( Tipo, aaEur,All,Gui)
mix_num_genes %>% pivot_longer(cols=aaEur:Gui, names_to = "cosa", values_to = "value")%>% 
  ggplot(aes(x = cosa, y = value, fill = Tipo))+geom_col(position = "dodge")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

# Tipo <- c('ABR','BAC','REL')
# aaEur <- c(10,5,2)
# All <- c(10,2,0)
# Gui <- c(14,2,8)
# core_num_genes <- data.frame( Tipo, aaEur,All,Gui)
# 
# core_num_genes %>% pivot_longer(cols=aaEur:Gui, names_to = "cosa", values_to = "value")%>% 
#   ggplot(aes(x = cosa, y = value, fill = Tipo))+geom_col(position = "dodge")+
#   theme_classic()+
#   theme(panel.background = element_blank(), panel.grid = element_blank(), 
#         aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
#         axis.text.x = element_text(angle = 45,  hjust=1))

11.2 Core genes

12 Hist of Genes

12.1 hist all genes

12.2 hist all genes europe_t0

12.3 hist all genes guayana

12.4 hist ABR

12.5 hist BAC

12.6 hist REL

12.7 hist ABR Guayana

12.8 hist BAC Guayana

12.9 hist REL Guayana

12.10 hist ABR Europe

12.11 hist BAC Europe

12.12 hist REL Europe

13 Resistance genes diversity in population

13.1 ABR diversity

13.2 REL diversity

13.3 BAC diversity

14 Resistance genes diversity in cohort and origin

14.1 ABR diversity in origin

14.2 REL diversity in origin

14.3 BAC diversity in origin

15 Total freq of Resistance genes

15.1 ABR freq by origin

15.2 depth of BAC by origin

15.3 REL freq by origin

16 Total depth of Resistance genes

16.1 depth of ABR by origin

16.2 depth of BAC by origin

16.3 depth of REL by origin

17 CHI-SQUARE PERCENTAGES

#Amerindians <- Tabla_inicial %>%select(origin) %>%  filter(origin == "guayana")

tabla_america <- Tabla_inicial %>% filter(origin == "guayana") %>% group_by(label) %>% distinct(template,label, .keep_all = TRUE) %>% ungroup %>%
  group_by(template) %>% summarize(suma = n())

tabla_europa <- Tabla_inicial %>% filter(origin == "europa") %>% group_by(label) %>% distinct(template,label, .keep_all = TRUE) %>% ungroup %>% 
  group_by(template) %>% summarize(suma = n())

tabla_chi <- full_join(tabla_europa, tabla_america, by= "template") 
colnames(tabla_chi) <- c("template","europe_P", "america_P" )

tabla_chi[is.na(tabla_chi)] <- 0
tabla_chi_filt <- tabla_chi %>% mutate(europe_N = 29 - europe_P ) %>% mutate(america_N = 95 - america_P )#%>% filter(europe_P > 2 | america_P > 9 )

chi_pvalue <- tabla_chi_filt %>%
  rowwise() %>%
  mutate(
    p.value = fisher.test(matrix(c(europe_P, europe_N, america_P, america_N), nrow = 2))$p.value,
    odds_ratio = fisher.test(matrix(c(europe_P, europe_N, america_P, america_N), nrow = 2))$estimate,
  )

colnames(chi_pvalue) <- c("template","europe_P","america_P","europe_N","america_N","p_value", "odds_ratio")
chi_pvalue <- as.data.frame(chi_pvalue)
chi_pvalue.adj <-chi_pvalue %>% mutate(padj= p.adjust(chi_pvalue$p_value, method = "BH")) #%>% filter(padj<0.05)

chi_pvalue.adj <- chi_pvalue.adj %>% mutate(america_odds = europe_P*america_N) %>% mutate(europe_odds = america_P*europe_N)
chi_pvalue.adj <- chi_pvalue.adj %>% mutate(result1= america_odds/europe_odds) %>% mutate(result2 = europe_odds/america_odds)
chi_pvalue.adj <- chi_pvalue.adj %>% mutate(porcentaje_eur=europe_P/(europe_P+europe_N)) %>% mutate(porcentaje_amer=america_P/(america_P+america_N))
chi_pvalue.adj[is.na(chi_pvalue.adj)] <- 0
chi_pvalue.adj$odds_ratio <- ifelse(chi_pvalue.adj$odds_ratio < 1, -1/chi_pvalue.adj$odds_ratio, chi_pvalue.adj$odds_ratio )
chi_pvalue.adj$direction <- ifelse(chi_pvalue.adj$odds_ratio < 0, 'america', 'europe')

chi_pvalue.adj.plot <- chi_pvalue.adj
chi_pvalue.adj.plot$fisher <- ifelse(chi_pvalue.adj.plot$padj<0.05, "yes", "no" ) 
chi_pvalue.adj.plot$label <- ifelse(chi_pvalue.adj.plot$padj<0.000000000000001, chi_pvalue.adj.plot$template, "" ) 

chi_pvalue.adj.plot %>% ggplot(aes(porcentaje_eur, porcentaje_amer, color= fisher))+geom_point()+
  geom_text_repel(aes(label=label),hjust = 0, nudge_x = 0.05, max.overlaps = 99)+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

chi_pvalue.adj <-chi_pvalue.adj %>% filter(padj<0.05)
tmpp <- Tabla_inicial %>% select(template, database) %>% distinct()

chi_pvalue.adj <- left_join(chi_pvalue.adj, tmpp, by = "template")

# chi_pvalue.adj$database = "ABR" 
# chi_pvalue.adj$database[grepl("^BAC",chi_pvalue.adj$template)] = "BAC" 
# chi_pvalue.adj$database[grepl("^MOB",chi_pvalue.adj$template)] = "REL" 

# chi_pvalue_rel <- chi_pvalue.adj %>% filter(str_detect(template, "^MOB"))
# chi_pvalue_rel_short <- chi_pvalue.adj %>% filter(str_detect(template, "^MOB"))
# chi_pvalue_rel_short$template <-  sub( "_.*" ," ", chi_pvalue_rel_short$template)
chi_pvalue_rel <- chi_pvalue.adj %>%filter(database == "REl")
chi_pvalue_rel_short <- chi_pvalue.adj %>%filter(database == "REL")
chi_pvalue_rel_short$template <-  sub( "_.*" ," ", chi_pvalue_rel_short$template)


chi_pvalue_abr <- chi_pvalue.adj %>% filter(database == "ABR")
chi_pvalue_abr_short <- chi_pvalue.adj %>%filter(database == "ABR")
#no se puede simplificar sin perder info
# chi_pvalue_abr_short$template <-  sub( ":.*" ," ", chi_pvalue_abr_short$template)
# chi_pvalue_abr_short$template <-  sub( "(_[^_]+)_.*" ,"\\1", chi_pvalue_abr_short$template)
# chi_pvalue_abr_short$template  = sapply(chi_pvalue_abr_short$template , function(x) paste(strsplit(x, "_")[[1]][1:2], collapse = '_'))



chi_pvalue_bac <- chi_pvalue.adj %>% filter(database == "BAC")
# chi_pvalue_bac_short <- chi_pvalue.adj %>% filter(database == "BAC") %>% left_join(Bac_met_db, by = 'template')
# chi_pvalue_bac_short$template <-  sub( "\\|+" ,"-", chi_pvalue_bac_short$template)
# chi_pvalue_bac_short$template <-  sub( "(\\|[^\\|]+)\\|.*" ,"", chi_pvalue_bac_short$template)

17.1 ABR Percentages by padj 2 colors

chi_pvalue_abr_short_longzzz <- chi_pvalue_abr_short %>% 
  select(template,porcentaje_eur,porcentaje_amer) %>%
  melt( id.vars = 'template')

chi_pvalue_abr_short_longzzz$value <- ifelse(chi_pvalue_abr_short_longzzz$variable == 'porcentaje_eur', chi_pvalue_abr_short_longzzz$value*-1, chi_pvalue_abr_short_longzzz$value)

joinedabr <- full_join(chi_pvalue_abr_short_longzzz, chi_pvalue_abr_short, by='template')

joinedabrz <- joinedabr %>% arrange(desc(value))

americcc <- joinedabrz %>% filter(variable == 'porcentaje_amer')
europpp <- joinedabrz %>% filter(variable == 'porcentaje_eur')

ggplot() +
  geom_col(data=americcc, aes(reorder(template, -padj), value, fill=padj)) +
  scale_fill_viridis_c(option = "D") +
  new_scale_fill() +
  geom_col(data= europpp, aes(reorder(template, -padj), value, fill=padj)) +
  scale_fill_viridis_c(option = "C")+
  coord_flip()

17.2 BAC Percentages by padj 2 colors

chi_pvalue_BAC_short_longzzz <- chi_pvalue_bac %>% 
  select(template,porcentaje_eur,porcentaje_amer) %>%
  melt( id.vars = 'template')

chi_pvalue_BAC_short_longzzz$value <- ifelse(chi_pvalue_BAC_short_longzzz$variable == 'porcentaje_eur', chi_pvalue_BAC_short_longzzz$value*-1, chi_pvalue_BAC_short_longzzz$value)

chi_pvalue_rel_short <- chi_pvalue_rel_short %>% arrange(desc(odds_ratio))
joinedBAC <- full_join(chi_pvalue_BAC_short_longzzz, chi_pvalue_bac, by='template')
joinedBACz <- joinedBAC %>% arrange(desc(value))

americcc <- joinedBACz %>% filter(variable == 'porcentaje_amer')
europpp <- joinedBACz %>% filter(variable == 'porcentaje_eur')

ggplot() +
  geom_col(data=americcc, aes(reorder(template, -padj), value, fill=padj)) +
  scale_fill_viridis_c(option = "D") +
  new_scale_fill() +
  geom_col(data= europpp, aes(reorder(template, -padj), value, fill=padj)) +
  scale_fill_viridis_c(option = "C")+
  coord_flip()

17.3 REL Percentages by padj 2 colors

chi_pvalue_REL_short_longzzz <- chi_pvalue_rel_short %>% 
  select(template,porcentaje_eur,porcentaje_amer) %>%
  melt( id.vars = 'template')

chi_pvalue_REL_short_longzzz$value <- ifelse(chi_pvalue_REL_short_longzzz$variable == 'porcentaje_eur', chi_pvalue_REL_short_longzzz$value*-1, chi_pvalue_REL_short_longzzz$value)

chi_pvalue_rel_short <- chi_pvalue_rel_short %>% arrange(desc(odds_ratio))
joinedREL <- full_join(chi_pvalue_REL_short_longzzz, chi_pvalue_rel_short, by='template')

joinedRELz <- joinedREL %>% arrange(desc(value))

americcc <- joinedRELz %>% filter(variable == 'porcentaje_amer')
europpp <- joinedRELz %>% filter(variable == 'porcentaje_eur')


ggplot() +
  geom_col(data=americcc, aes(reorder(template, -padj), value, fill=padj)) +
  scale_fill_viridis_c(option = "D") +
  new_scale_fill() +
  geom_col(data= europpp, aes(reorder(template, -padj), value, fill=padj)) +
  scale_fill_viridis_c(option = "C")+
  coord_flip()

18 Lefse

18.1 Lefse general

18.2 Lefse ABR

plotLDA(res_origin_ABR_LEFSE,group=c("europa","guayana"),lda=2,pvalue=0.05)

18.3 Lefse BAC

plotLDA(res_origin_BAC_LEFSE,group=c("europa","guayana"),lda=5,pvalue=0.05)

18.4 Lefse REL

plotLDA(res_origin_REL_LEFSE,group=c("europa","guayana"),lda=5,pvalue=0.05)

19 HEATMAPS

19.1 ALL Heatmap

pheatmap(log(heat_padj+1), clustering_method = "ward.D2")

19.2 ABR Heatmap

pheatmap(log(heat_padj_abr+1), clustering_method = "ward.D2")

Heatmap of the Antimicrobial resistance genes. The genes that appear on the y axis and the individuals of the x axis were previously selected From an ANOVA test with p-value lower than 0,05 in. Red indicates the highest value of appearance of the gene in the population and blue the contrary.

19.3 REL Heatmap

pheatmap(log(heat_padj_rel+1), clustering_method = "ward.D2") 

Heatmap of the Relaxases resistance genes. The genes that appear on the y axis and the individuals of the x axis were previously selected From an ANOVA test with p-value lower than 0,05 in. Red indicates the highest value of appearance of the gene in the population and blue the contrary.

Heatmap of the Biocides and metals resistance genes. The genes that appear on the y axis and the individuals of the x axis were previously selected From an ANOVA test with p-value lower than 0,05 in. Red indicates the highest value of appearance of the gene in the population and blue the contrary.

19.4 ALL Heatmap ALL

pheatmap(log(heat_padj+1), clustering_method = "ward.D2")

20 Metadata

20.1 Metadata

metadata_all_relevant$lib <- "ALL"
metadata_abr_relevant$lib <- "AzBR"
metadata_bac_relevant$lib <- "BAC"
metadata_rel_relevant$lib <- "REL"
metadata_bac_metal_relevant$lib <- "BAC_metal"
metadata_bac_biocide_relevant$lib <- "BAC_biocide"

metameta <- rbind(metadata_abr_relevant, metadata_all_relevant,metadata_bac_relevant,metadata_rel_relevant, metadata_bac_metal_relevant, metadata_bac_biocide_relevant)

metameta %>% ggplot(aes(x=lib, y=term)) + geom_tile(aes(fill=p.value))+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1,strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

20.2 Metadata binaries

metadata_all_bin_relevant$lib <- "ALL"
metadata_abr_bin_relevant$lib <- "ABR"
metadata_bac_bin_relevant$lib <- "BAC"
metadata_rel_bin_relevant$lib <- "REL"
metadata_bac_biocide_bin_relevant$lib <- "BAC_biocide"   
metadata_bac_metal_bin_relevant$lib <- "BAC_metal"

metameta_bin <- rbind(metadata_all_bin_relevant, metadata_abr_bin_relevant,metadata_bac_bin_relevant,metadata_rel_bin_relevant, metadata_bac_biocide_bin_relevant, metadata_bac_metal_bin_relevant)

metameta_bin %>% ggplot(aes(x=lib, y=term)) + geom_tile(aes(fill=p.value))+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

20.3 metadata suma

meta_all<- meta_all  %>% filter(Df > 0)%>%  filter(`Pr(>F)` < 0.05) %>% rownames_to_column(var = "term")
meta_abr<- meta_abr %>% filter(Df > 0) %>%  filter(`Pr(>F)` < 0.05)%>% rownames_to_column(var = "term")
meta_bac<- meta_bac %>% filter(Df > 0) %>%  filter(`Pr(>F)` < 0.05)%>% rownames_to_column(var = "term")
meta_rel<- meta_rel %>% filter(Df > 0) %>%  filter(`Pr(>F)` < 0.05)%>% rownames_to_column(var = "term")
meta_bac_metal<- meta_bac_metal %>% filter(Df > 0) %>%  filter(`Pr(>F)` < 0.05)%>% rownames_to_column(var = "term")
meta_bac_biocide<- meta_bac_biocide %>% filter(Df > 0) %>%  filter(`Pr(>F)` < 0.05)%>% rownames_to_column(var = "term")

meta_all$lib <- "ALL"
meta_abr$lib <- "ABR"
meta_bac$lib <- "BAC"
meta_rel$lib <- "REL"
meta_bac_metal$lib <- "BAC_biocide"   
meta_bac_biocide$lib <- "BAC_biocide"   

meta <- rbind(meta_all, meta_abr,meta_bac,meta_rel, meta_bac_metal, meta_bac_biocide)

meta %>% ggplot(aes(x=lib, y=term)) + geom_tile(aes(fill=`Pr(>F)`))+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))