1 Load Rarefaction

2 Diversity index

2.1 Shapiro test

diver_gral <- diver_gral %>%
  pivot_longer(cols=chao_gral:dominance.simpson_BACBiocide, names_to = "cosa", values_to = "value") %>% 
  separate(cosa, into=c("index", "database2"), sep="_")

shapiro <- diver_gral %>%
  group_by(database2, index, origin) %>%
  do(tidy(shapiro.test(.$value)))
datatable(shapiro, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )

2.2 Wilcox Test

wilcox <- diver_gral %>% 
  group_by(database2, index) %>% 
  do(tidy(wilcox.test(.$value~.$origin)))
datatable(wilcox, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )

student <- diver_gral %>%
  group_by(database2, index) %>%
  do(tidy(t.test(.$value~.$origin)))
datatable(student, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )

2.3 Shannon Index

2.4 Chao index

diver_gral %>% filter(index== "chao") %>% 
  ggplot(aes(x=origin, y = value,fill=origin)) +
  geom_violin()+
  geom_boxplot(width=.2) +
  facet_wrap(.~database2)+
  ggtitle ("chao index") +xlab("Cohort origin") +ylab("chao Index")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

diver_gral %>%  filter(index == "chao") %>% group_by(origin, database2)%>% summarise(media = mean(value)) %>% 
  datatable(rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )

## `summarise()` has grouped output by 'origin'. You can override using the
## `.groups` argument.

grouped_ggbetweenstats(
  data  = diver_gral %>% filter(index== "chao"),
  x     = origin,
  y     = value,
  grouping.var = database2)

2.5 Dominance simpson

diver_gral %>% filter(index== "dominance.simpson") %>% 
  ggplot(aes(x=origin, y = value,fill=origin)) +
  geom_violin()+
  geom_boxplot(width=.2) +
  facet_wrap(.~database2)+
  ggtitle ("dominance simpson diversity") +xlab("Cohort origin") +ylab("simpson Index")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

diver_gral %>%  filter(index == "dominance.simpson") %>% group_by(origin, database2)%>% summarise(media = mean(value)) %>% 
  datatable(rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T))

## `summarise()` has grouped output by 'origin'. You can override using the
## `.groups` argument.

grouped_ggbetweenstats(
  data  = diver_gral %>% filter(index== "dominance.simpson"),
  x     = origin,
  y     = value,
  grouping.var = database2)

2.6 Eveness simpson

diver_gral %>% filter(index== "evenness.simpson") %>% 
  ggplot(aes(x=origin, y = value,fill=origin)) +
  geom_violin()+
  geom_boxplot(width=.2) +
  facet_wrap(.~database2)+
  ggtitle ("eveness simpson diversity") +xlab("Cohort origin") +ylab("simpson Index")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

diver_gral %>%  filter(index == "evenness.simpson") %>% group_by(origin, database2)%>% summarise(media = mean(value)) %>% 
  datatable(rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T))

## `summarise()` has grouped output by 'origin'. You can override using the
## `.groups` argument.

grouped_ggbetweenstats(
  data  = diver_gral %>% filter(index== "evenness.simpson"),
  x     = origin,
  y     = value,
  grouping.var = database2)

3 PCA

3.1 PCA gral by Origin

as.data.frame(PCA$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
  select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 47.29%") +ylab("PC2 21.43%")

3.2 PCA ABR by origin

as.data.frame(PCA_ABR$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
  select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 70.76%") +ylab("PC2 6.89%")

3.3 PCA BAC by origin

as.data.frame(PCA_BAC$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
  select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 51.78%") +ylab("PC2 10.88%")

3.4 PCA REL by origin

as.data.frame(PCA_REL$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
  select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 75.26%") +ylab("PC2 12.94%")

3.5 PCA BAC Metals by origin

as.data.frame(PCA_BAC_Metal$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
  select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 50.87%") +ylab("PC2 10.29%")

3.6 PCA BAC Biocide by origin

as.data.frame(PCA_BAC_Biocide$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
  select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 53.32%") +ylab("PC2 10.54%")

4 UMAP

Guayana_tsne_spearman <- as.matrix(Guayana_tsne_spearman)
Guayana_tsne_spearman_REL <- as.matrix(Guayana_tsne_spearman_REL)
Guayana_tsne_spearman_ABR <- as.matrix(Guayana_tsne_spearman_ABR)
Guayana_tsne_spearman_BAC <- as.matrix(Guayana_tsne_spearman_BAC)
Guayana_tsne_spearman_BAC_Metal = get_dist(Guayana_spread_BAC_Metal, method = "spearman")
Guayana_tsne_spearman_BAC_Biocide = get_dist(Guayana_spread_BAC_Biocide, method = "spearman")


umap_plot <- umap(Guayana_tsne_spearman, input="dist")
umap_plot_REL <- umap(Guayana_tsne_spearman_REL, input="dist")
umap_plot_ABR <- umap(Guayana_tsne_spearman_ABR, input="dist")
umap_plot_BAC <- umap(Guayana_tsne_spearman_BAC, input="dist")
umap_plot_BAC_Metal <- umap(Guayana_spread_BAC_Metal, input="dist")
umap_plot_BAC_Biocide <- umap(Guayana_spread_BAC_Biocide, input="dist")

umap_plot <- as.data.frame(umap_plot$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_REL <- as.data.frame(umap_plot_REL$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_ABR <- as.data.frame(umap_plot_ABR$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_BAC <- as.data.frame(umap_plot_BAC$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_BAC_Metal <- as.data.frame(umap_plot_BAC_Metal$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_BAC_Biocide <- as.data.frame(umap_plot_BAC_Biocide$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)

4.1 UMAP general by origin

4.2 UMAP ABR by origin

umap_plot_ABR %>%
  select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
  ggtitle ("UMAP ABR")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 0,  hjust=1))

4.3 UMAP BAC by origin

umap_plot_BAC %>%
  select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
  ggtitle ("UMAP BAC")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

4.4 UMAP REL by origin

umap_plot_REL %>%
  select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
  ggtitle ("UMAP REL")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

4.5 UMAP BAC Metal by origin

umap_plot_BAC_Metal %>%
  select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
  ggtitle ("UMAP BAC Metal")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

4.6 UMAP BAC Biocide by origin

umap_plot_BAC_Biocide %>%
  select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
  ggtitle ("UMAP BAC biocide")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

5 T-SNE

5.1 T-SNE general by origin

ggplot(tsne_spearman_30_plot) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE")

5.2 T-SNE ABR by origin

ggplot(tsne_spearman_30_plot_ABR) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE ABR")

5.3 T-SNE BAC by origin

ggplot(tsne_spearman_30_plot_BAC) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE BAC")

5.4 T-SNE REL by origin

ggplot(tsne_spearman_30_plot_REL) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE REL")

5.5 T-SNE Metal by origin

ggplot(tsne_spearman_30_plot_BAC_Metal) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE metal")

5.6 T-SNE biocide by origin

ggplot(tsne_spearman_30_plot_BAC_Biocide) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE biocide")

6 Adonis

6.1 Adonis all

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = Guayana_spread_t[, 1:5199] ~ origin, data = Guayana_spread_t, permutations = 999)
##           Df SumOfSqs      R2     F Pr(>F)    
## origin     1    5.666 0.14151 20.11  0.001 ***
## Residual 122   34.373 0.85849                 
## Total    123   40.039 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6.2 Adonis ABR

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = Guayana_spread_ABR_t[, 1:367] ~ origin, data = Guayana_spread_ABR_t, permutations = 999)
##           Df SumOfSqs      R2      F Pr(>F)    
## origin     1   1.7155 0.10563 14.409  0.001 ***
## Residual 122  14.5246 0.89437                  
## Total    123  16.2401 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6.3 Adonis BAC

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = Guayana_spread_BAC_t[, 1:4429] ~ origin, data = Guayana_spread_BAC_t, permutations = 999)
##           Df SumOfSqs      R2      F Pr(>F)    
## origin     1    3.729 0.07139 9.3787  0.001 ***
## Residual 122   48.512 0.92861                  
## Total    123   52.241 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6.4 Adonis REL

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = Guayana_spread_REL_t[, 1:403] ~ origin, data = Guayana_spread_REL_t, permutations = 999)
##           Df SumOfSqs      R2      F Pr(>F)    
## origin     1   3.6817 0.13367 18.824  0.001 ***
## Residual 122  23.8609 0.86633                  
## Total    123  27.5426 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6.5 Adonis BAC metal

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = Guayana_spread_BAC_Metal_t[, 1:2933] ~ origin, data = Guayana_spread_BAC_Metal_t, permutations = 999)
##           Df SumOfSqs      R2      F Pr(>F)    
## origin     1    1.738 0.03368 4.2527  0.001 ***
## Residual 122   49.851 0.96632                  
## Total    123   51.589 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6.6 Adonis BAC biocide

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = Guayana_spread_BAC_Biocide_t[, 1:2000] ~ origin, data = Guayana_spread_BAC_Biocide_t, permutations = 999)
##           Df SumOfSqs      R2      F Pr(>F)    
## origin     1    3.901 0.07349 9.6775  0.001 ***
## Residual 122   49.184 0.92651                  
## Total    123   53.085 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7 Different genes stats

7.1 Quantity of distinct genes by origin

## # A tibble: 3 × 5
## # Groups:   database [3]
##   database statistic p.value method                                  alternative
##   <chr>        <dbl>   <dbl> <chr>                                   <chr>      
## 1 ABR          1446.   0.686 Wilcoxon rank sum test with continuity… two.sided  
## 2 BAC          1132.   0.149 Wilcoxon rank sum test with continuity… two.sided  
## 3 REL          1544.   0.328 Wilcoxon rank sum test with continuity… two.sided

7.2 Mean of distinct genes by origin per person

## # A tibble: 1 × 1
##   media
##   <dbl>
## 1  799.

## # A tibble: 1 × 1
##   media
##   <dbl>
## 1  575.

7.3 Proportion unique genes

## # A tibble: 1 × 1
##   media
##   <dbl>
## 1 25301

## # A tibble: 1 × 1
##   media
##   <dbl>
## 1  575.

7.4 Proportion unique genes per person

7.5 Total unique genes per person

7.6 genes per million

7.7 Rarefaction

7.8 Rarefaction ABR

7.9 Rarefaction REL

7.10 Rarefaction BAC

7.11 Abundance vs Depth

8 Tables of Genes

8.1 Common genes ABR

8.2 not in common genes ABR

8.3 Common genes BAC

8.4 not in common genes BAC

8.5 Common genes BAC metal

8.6 not in common genes BAC metal

8.7 Common genes BAC biocide

8.8 not in common genes BAC biocide

8.9 Common genes REL

8.10 not in common genes REL

8.11 Common genes all

8.12 not in common genes

9 Core genes

9.1 Core genes all

tabla_core <- Tabla_inicial%>% group_by(template) %>% summarise(TotalFreq = n())%>% ungroup() %>% filter(TotalFreq == 124)
datatable(tabla_core, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )

9.2 Core genes europe

tabla_core_eur <- Tabla_inicial %>% filter(origin=="europa") %>% group_by(template) %>% summarise(TotalFreq = n())%>% ungroup() %>% filter(TotalFreq == 29)
datatable(tabla_core_eur, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )

9.3 Core genes guayana

tabla_core_guayana <- Tabla_inicial %>% filter(origin=="guayana") %>% group_by(template) %>% summarise(TotalFreq = n())%>% ungroup() %>% filter(TotalFreq == 95)
datatable(tabla_core_guayana, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )

10 Summary of genes

10.1 summary of gene distribution

ABR_common_genes <- tabla_para_teresa_ABR %>%  group_by(Label) %>% summarise(number = n())
BAC_common_genes<-tabla_para_teresa_BAC %>% distinct(template,Label,TotalFreq) %>%  group_by(Label) %>% summarise(number = n())
REL_common_genes <- tabla_para_teresa_REL %>%  group_by(Label) %>% summarise(number = n())

ABR_notcommon_genes <- anti_tabla_para_teresa_ABR %>%  group_by(Label) %>% summarise(number = n())
BAC_notcommon_genes <- anti_tabla_para_teresa_BAC %>%distinct(template,Label,TotalFreq) %>%  group_by(Label) %>% summarise(number = n())
REL_notcommon_genes <- anti_tabla_para_teresa_REL %>%  group_by(Label) %>% summarise(number = n())

Tipo <- c('ABR','BAC','REL')
aaEur <- c(ABR_notcommon_genes$number[1],BAC_notcommon_genes$number[1],REL_notcommon_genes$number[1])
All <- c( ABR_common_genes$number[1],BAC_common_genes$number[1],REL_common_genes$number[1])
Gui <- c(ABR_notcommon_genes$number[2],BAC_notcommon_genes$number[2],REL_notcommon_genes$number[2])
mix_num_genes <- data.frame( Tipo, aaEur,All,Gui)
mix_num_genes %>% pivot_longer(cols=aaEur:Gui, names_to = "cosa", values_to = "value")%>% 
  ggplot(aes(x = cosa, y = value, fill = Tipo))+geom_col(position = "dodge")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

mix_num_genes %>% pivot_longer(cols=aaEur:Gui, names_to = "cosa", values_to = "value")%>% 
  ggplot(aes(x = Tipo, y = value, fill = cosa))+geom_col(position = "dodge")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1,strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

mix_num_genes %>% pivot_longer(cols=aaEur:Gui, names_to = "cosa", values_to = "value")

## # A tibble: 9 × 3
##   Tipo  cosa  value
##   <chr> <chr> <int>
## 1 ABR   aaEur   109
## 2 ABR   All     156
## 3 ABR   Gui     103
## 4 BAC   aaEur  1429
## 5 BAC   All    4469
## 6 BAC   Gui    5302
## 7 REL   aaEur    72
## 8 REL   All     240
## 9 REL   Gui      91

10.2 distinct genes

Tabla_inicial %>% select(template, origin, database) %>% distinct() %>% group_by(origin, database)%>% summarise(number = n()) %>% 
  ggplot(aes(x = database, y = number, fill = origin))+geom_col(position = "dodge")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1,strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

## `summarise()` has grouped output by 'origin'. You can override using the
## `.groups` argument.

Tabla_inicial %>% select(template, origin, database) %>% distinct() %>% group_by(origin, database)%>% summarise(number = n())

## `summarise()` has grouped output by 'origin'. You can override using the
## `.groups` argument.

## # A tibble: 6 × 3
## # Groups:   origin [2]
##   origin  database number
##   <chr>   <chr>     <int>
## 1 europa  ABR         264
## 2 europa  BAC        5898
## 3 europa  REL         312
## 4 guayana ABR         259
## 5 guayana BAC        9771
## 6 guayana REL         331

10.3 Core genes

## # A tibble: 9 × 3
##   Tipo  cosa  value
##   <chr> <chr> <dbl>
## 1 ABR   aaEur    12
## 2 ABR   All      10
## 3 ABR   Gui      17
## 4 BAC   aaEur     0
## 5 BAC   All       0
## 6 BAC   Gui       0
## 7 REL   aaEur     8
## 8 REL   All       6
## 9 REL   Gui      24

11 Hist of Genes

11.1 hist all genes

11.2 hist all genes LOG

11.3 hist all genes europe_t0

11.4 hist all genes europe_t0 LOG

11.5 hist all genes guayana

11.6 hist all genes guayana LOG

11.7 hist ABR

11.8 hist ABR log

11.9 hist BAC

11.10 hist BAC log

11.11 hist REL

11.12 hist REL log

11.13 hist ABR Guayana

11.14 hist BAC Guayana

11.15 hist REL Guayana

11.16 hist ABR Europe

11.17 hist BAC Europe

11.18 hist REL Europe

12 Resistance genes diversity in population

12.1 ABR diversity

12.2 REL diversity

12.3 BAC diversity

13 Resistance genes diversity in cohort and origin

13.1 ABR diversity in origin

13.2 ABR diversity depth

13.3 ABR diversity percentage

rmtF_1_JQ808129

13.4 REL diversity in origin

13.5 BAC diversity in origin

13.6 BAC metals diversity in origin

13.7 BAC biocide diversity in origin

14 Total freq of Resistance genes

14.1 ABR freq by origin

14.2 depth of BAC by origin

14.3 REL freq by origin

15 Total depth of Resistance genes

15.1 depth of ABR by origin

15.2 depth of BAC by origin

15.3 depth of REL by origin

16 CHI-SQUARE PERCENTAGES

#Amerindians <- Tabla_inicial %>%select(origin) %>%  filter(origin == "guayana")

tabla_america <- Tabla_inicial %>% filter(origin == "guayana") %>% group_by(label) %>% distinct(template,label, .keep_all = TRUE) %>% ungroup %>%
  group_by(template) %>% summarize(suma = n())

tabla_europa <- Tabla_inicial %>% filter(origin == "europa") %>% group_by(label) %>% distinct(template,label, .keep_all = TRUE) %>% ungroup %>% 
  group_by(template) %>% summarize(suma = n())

tabla_chi <- full_join(tabla_europa, tabla_america, by= "template") 
colnames(tabla_chi) <- c("template","europe_P", "america_P" )

tabla_chi[is.na(tabla_chi)] <- 0
tabla_chi_filt <- tabla_chi %>% mutate(europe_N = 29 - europe_P ) %>% mutate(america_N = 95 - america_P )#%>% filter(europe_P > 2 | america_P > 9 )

chi_pvalue <- tabla_chi_filt %>%
  rowwise() %>%
  mutate(
    p.value = fisher.test(matrix(c(europe_P, europe_N, america_P, america_N), nrow = 2))$p.value,
    odds_ratio = fisher.test(matrix(c(europe_P, europe_N, america_P, america_N), nrow = 2))$estimate,
  )

colnames(chi_pvalue) <- c("template","europe_P","america_P","europe_N","america_N","p_value", "odds_ratio")
chi_pvalue <- as.data.frame(chi_pvalue)
chi_pvalue.adj <-chi_pvalue %>% mutate(padj= p.adjust(chi_pvalue$p_value, method = "BH")) #%>% filter(padj<0.05)

chi_pvalue.adj <- chi_pvalue.adj %>% mutate(america_odds = europe_P*america_N) %>% mutate(europe_odds = america_P*europe_N)
chi_pvalue.adj <- chi_pvalue.adj %>% mutate(result1= america_odds/europe_odds) %>% mutate(result2 = europe_odds/america_odds)
chi_pvalue.adj <- chi_pvalue.adj %>% mutate(porcentaje_eur=europe_P/(europe_P+europe_N)) %>% mutate(porcentaje_amer=america_P/(america_P+america_N))
chi_pvalue.adj[is.na(chi_pvalue.adj)] <- 0
chi_pvalue.adj$odds_ratio <- ifelse(chi_pvalue.adj$odds_ratio < 1, -1/chi_pvalue.adj$odds_ratio, chi_pvalue.adj$odds_ratio )
chi_pvalue.adj$direction <- ifelse(chi_pvalue.adj$odds_ratio < 0, 'america', 'europe')

tmpp <- Tabla_inicial %>% select(template, database) %>% distinct()
chi_pvalue.adj <- left_join(chi_pvalue.adj, tmpp, by = "template")


fisher_abr <- chi_pvalue.adj %>% filter(database == "ABR") %>% slice_min(padj, n = 5)
fisher_bac <- chi_pvalue.adj %>% filter(database == "BAC") %>% slice_min(padj, n = 5)
fisher_rel <- chi_pvalue.adj %>% filter(database == "REL") %>% slice_min(padj, n = 5)
fisher_label <- bind_rows(fisher_abr,fisher_bac, fisher_rel) %>% select(template)
fisher_label$label <- fisher_label$template


chi_pvalue.adj.plot <- chi_pvalue.adj
chi_pvalue.adj.plot$fisher <- ifelse(chi_pvalue.adj.plot$padj<0.05, "yes", "no" ) 
chi_pvalue.adj.plot <- left_join(chi_pvalue.adj.plot, fisher_label)
# chi_pvalue.adj.plot$label <- ifelse(chi_pvalue.adj.plot$padj<0.000000000000001, chi_pvalue.adj.plot$template, "" ) 
# chi_pvalue.adj.plot$label <- ifelse(chi_pvalue.adj.plot$padj<0.000000000000001, chi_pvalue.adj.plot$template, "" ) 


chi_pvalue.adj.plot %>% ggplot(aes(porcentaje_eur, porcentaje_amer, color= fisher))+geom_point()+
  geom_text_repel(aes(label=label),hjust = 0, nudge_x = 0.05, max.overlaps = 99)+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

chi_pvalue.adj <-chi_pvalue.adj %>% filter(padj<0.05)

# chi_pvalue.adj$database = "ABR" 
# chi_pvalue.adj$database[grepl("^BAC",chi_pvalue.adj$template)] = "BAC" 
# chi_pvalue.adj$database[grepl("^MOB",chi_pvalue.adj$template)] = "REL" 

# chi_pvalue_rel <- chi_pvalue.adj %>% filter(str_detect(template, "^MOB"))
# chi_pvalue_rel_short <- chi_pvalue.adj %>% filter(str_detect(template, "^MOB"))
# chi_pvalue_rel_short$template <-  sub( "_.*" ," ", chi_pvalue_rel_short$template)
chi_pvalue_rel <- chi_pvalue.adj %>%filter(database == "REl")
chi_pvalue_rel_short <- chi_pvalue.adj %>%filter(database == "REL")
chi_pvalue_rel_short$template <-  sub( "_.*" ," ", chi_pvalue_rel_short$template)


chi_pvalue_abr <- chi_pvalue.adj %>% filter(database == "ABR")
chi_pvalue_abr_short <- chi_pvalue.adj %>%filter(database == "ABR")
#no se puede simplificar sin perder info
# chi_pvalue_abr_short$template <-  sub( ":.*" ," ", chi_pvalue_abr_short$template)
# chi_pvalue_abr_short$template <-  sub( "(_[^_]+)_.*" ,"\\1", chi_pvalue_abr_short$template)
# chi_pvalue_abr_short$template  = sapply(chi_pvalue_abr_short$template , function(x) paste(strsplit(x, "_")[[1]][1:2], collapse = '_'))



chi_pvalue_bac <- chi_pvalue.adj %>% filter(database == "BAC")
# chi_pvalue_bac_short <- chi_pvalue.adj %>% filter(database == "BAC") %>% left_join(Bac_met_db, by = 'template')
# chi_pvalue_bac_short$template <-  sub( "\\|+" ,"-", chi_pvalue_bac_short$template)
# chi_pvalue_bac_short$template <-  sub( "(\\|[^\\|]+)\\|.*" ,"", chi_pvalue_bac_short$template)

16.1 ABR Percentages by padj 2 colors

chi_pvalue_abr_short_longzzz <- chi_pvalue_abr_short %>% 
  select(template,porcentaje_eur,porcentaje_amer) %>%
  melt( id.vars = 'template')

chi_pvalue_abr_short_longzzz$value <- ifelse(chi_pvalue_abr_short_longzzz$variable == 'porcentaje_eur', chi_pvalue_abr_short_longzzz$value*-1, chi_pvalue_abr_short_longzzz$value)

joinedabr <- full_join(chi_pvalue_abr_short_longzzz, chi_pvalue_abr_short, by='template')

joinedabrz <- joinedabr %>% arrange(desc(value))

americcc <- joinedabrz %>% filter(variable == 'porcentaje_amer')
europpp <- joinedabrz %>% filter(variable == 'porcentaje_eur')

ggplot() +
  geom_col(data=americcc, aes(reorder(template, -padj), value, fill=padj)) +
  scale_fill_viridis_c(option = "D") +
  new_scale_fill() +
  geom_col(data= europpp, aes(reorder(template, -padj), value, fill=padj)) +
  scale_fill_viridis_c(option = "C")+
  coord_flip()

16.2 BAC Percentages by padj 2 colors

chi_pvalue_BAC_short_longzzz <- chi_pvalue_bac %>% 
  select(template,porcentaje_eur,porcentaje_amer) %>%
  melt( id.vars = 'template')

chi_pvalue_BAC_short_longzzz$value <- ifelse(chi_pvalue_BAC_short_longzzz$variable == 'porcentaje_eur', chi_pvalue_BAC_short_longzzz$value*-1, chi_pvalue_BAC_short_longzzz$value)

chi_pvalue_rel_short <- chi_pvalue_rel_short %>% arrange(desc(odds_ratio))
joinedBAC <- full_join(chi_pvalue_BAC_short_longzzz, chi_pvalue_bac, by='template')
joinedBACz <- joinedBAC %>% arrange(desc(value))

americcc <- joinedBACz %>% filter(variable == 'porcentaje_amer')
europpp <- joinedBACz %>% filter(variable == 'porcentaje_eur')

ggplot() +
  geom_col(data=americcc, aes(reorder(template, -padj), value, fill=padj)) +
  scale_fill_viridis_c(option = "D") +
  new_scale_fill() +
  geom_col(data= europpp, aes(reorder(template, -padj), value, fill=padj)) +
  scale_fill_viridis_c(option = "C")+
  coord_flip()

16.3 REL Percentages by padj 2 colors

chi_pvalue_REL_short_longzzz <- chi_pvalue_rel_short %>% 
  select(template,porcentaje_eur,porcentaje_amer) %>%
  melt( id.vars = 'template')

chi_pvalue_REL_short_longzzz$value <- ifelse(chi_pvalue_REL_short_longzzz$variable == 'porcentaje_eur', chi_pvalue_REL_short_longzzz$value*-1, chi_pvalue_REL_short_longzzz$value)

chi_pvalue_rel_short <- chi_pvalue_rel_short %>% arrange(desc(odds_ratio))
joinedREL <- full_join(chi_pvalue_REL_short_longzzz, chi_pvalue_rel_short, by='template')

joinedRELz <- joinedREL %>% arrange(desc(value))

americcc <- joinedRELz %>% filter(variable == 'porcentaje_amer')
europpp <- joinedRELz %>% filter(variable == 'porcentaje_eur')


ggplot() +
  geom_col(data=americcc, aes(reorder(template, -padj), value, fill=padj)) +
  scale_fill_viridis_c(option = "D") +
  new_scale_fill() +
  geom_col(data= europpp, aes(reorder(template, -padj), value, fill=padj)) +
  scale_fill_viridis_c(option = "C")+
  coord_flip()

17 Lefse

17.1 Lefse general

17.2 Lefse ABR

plotLDA(res_origin_ABR_LEFSE,group=c("europa","guayana"),lda=3,pvalue=0.05)

17.3 Lefse BAC

plotLDA(res_origin_BAC_LEFSE,group=c("europa","guayana"),lda=4,pvalue=0.05)

17.4 Lefse REL

plotLDA(res_origin_REL_LEFSE,group=c("europa","guayana"),lda=5,pvalue=0.05)

18 LDA

18.1 GLM ALL

18.2 GLM ABR

18.3 GLM BAC

18.4 GLM REL

19 HEATMAPS

19.1 GLM ALL Heatmap

pheatmap(log(heat_padj+1), clustering_method = "ward.D2")

19.2 GLM ABR Heatmap

pheatmap(log(heat_padj_abr+1), clustering_method = "ward.D2")

19.3 GLM REL Heatmap

pheatmap(log(heat_padj_rel+1), clustering_method = "ward.D2")

19.4 GLM BAC Heatmap

# pheatmap(log(theat_padj_bac+1), clustering_method = "ward.D2")
pheatmap(log(heat_padj_bac+1), clustering_method = "ward.D2")

19.5 FISHER ALL Heatmap

pheatmap(log(heat_padj+1), clustering_method = "ward.D2")

19.6 FISHER ABR Heatmap

pheatmap(log(heat_padj_abr+1), clustering_method = "ward.D2")

Heatmap of the Antimicrobial resistance genes. The genes that appear on the y axis and the individuals of the x axis were previously selected From an ANOVA test with p-value lower than 0,05 in. Red indicates the highest value of appearance of the gene in the population and blue the contrary.

19.7 FISHER REL Heatmap

pheatmap(log(heat_padj_rel+1), clustering_method = "ward.D2")

Heatmap of the Relaxases resistance genes. The genes that appear on the y axis and the individuals of the x axis were previously selected From an ANOVA test with p-value lower than 0,05 in. Red indicates the highest value of appearance of the gene in the population and blue the contrary.

19.8 FISEHR BAC METAL Heatmap

# pheatmap(log(theat_padj_bac+1), clustering_method = "ward.D2")
pheatmap(log(heat_padj_bac+1), clustering_method = "ward.D2")

Heatmap of the Biocides and metals resistance genes. The genes that appear on the y axis and the individuals of the x axis were previously selected From an ANOVA test with p-value lower than 0,05 in. Red indicates the highest value of appearance of the gene in the population and blue the contrary.

19.9 FISEHR BAC Biocide Heatmap

# pheatmap(log(theat_padj_bac+1), clustering_method = "ward.D2")
pheatmap(log(heat_padj_bac+1), clustering_method = "ward.D2")

19.10 ALL Heatmap

# pheatmap(log(heat_padj+1), clustering_method = "ward.D2")

19.11 ABR Heatmap

# pheatmap(log(heat_padj_abr+1), clustering_method = "ward.D2")

19.12 REL Heatmap

# pheatmap(log(heat_padj_rel+1), clustering_method = "ward.D2")

19.13 ALL Heatmap ALL

# pheatmap(log(heat_padj+1), clustering_method = "ward.D2")

20 Metadata

20.1 metadata suma

meta_all<- meta_all  %>% filter(Df > 0)%>%  filter(`Pr(>F)` < 0.05) %>% rownames_to_column(var = "term")
meta_abr<- meta_abr %>% filter(Df > 0) %>%  filter(`Pr(>F)` < 0.05)%>% rownames_to_column(var = "term")
meta_bac<- meta_bac %>% filter(Df > 0) %>%  filter(`Pr(>F)` < 0.05)%>% rownames_to_column(var = "term")
meta_rel<- meta_rel %>% filter(Df > 0) %>%  filter(`Pr(>F)` < 0.05)%>% rownames_to_column(var = "term")
meta_bac_metal<- meta_bac_metal %>% filter(Df > 0) %>%  filter(`Pr(>F)` < 0.05)%>% rownames_to_column(var = "term")
meta_bac_biocide<- meta_bac_biocide %>% filter(Df > 0) %>%  filter(`Pr(>F)` < 0.05)%>% rownames_to_column(var = "term")

meta_all$lib <- "ALL"
meta_abr$lib <- "ABR"
meta_bac$lib <- "BAC"
meta_rel$lib <- "REL"
meta_bac_metal$lib <- "BAC_biocide"   
meta_bac_biocide$lib <- "BAC_biocide"   

meta <- rbind(meta_all, meta_abr,meta_bac,meta_rel, meta_bac_metal, meta_bac_biocide)

metameta <- meta

metameta$label <- ifelse(metameta$`Pr(>F)`<0.01, "0.01",  ifelse(metameta$`Pr(>F)`<0.02, "0.02",  ifelse(metameta$`Pr(>F)`<0.03, "0.03",  ifelse(metameta$`Pr(>F)`<0.04, "0.04",  ifelse(metameta$`Pr(>F)`<0.05, "0.05", "XD")))))

meta %>% ggplot(aes(x=lib, y=term)) + geom_tile(aes(fill=`Pr(>F)`))+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1,strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

metameta %>% ggplot(aes(x=lib, y=term)) + geom_tile(aes(fill=label))+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1,strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

GUAYANA PAPER 95