diver_gral <- diver_gral %>%
pivot_longer(cols=chao_gral:dominance.simpson_BACBiocide, names_to = "cosa", values_to = "value") %>%
separate(cosa, into=c("index", "database2"), sep="_")
shapiro <- diver_gral %>%
group_by(database2, index, origin) %>%
do(tidy(shapiro.test(.$value)))
datatable(shapiro, rownames = FALSE, filter="top", options = list(pageLength = 30, scrollX=T, scrollY=T) )
wilcox <- diver_gral %>%
group_by(database2, index) %>%
do(tidy(wilcox.test(.$value~.$origin)))
datatable(wilcox, rownames = FALSE, filter="top", options = list(pageLength = 30, scrollX=T, scrollY=T) )
student <- diver_gral %>%
group_by(database2, index) %>%
do(tidy(t.test(.$value~.$origin)))
datatable(student, rownames = FALSE, filter="top", options = list(pageLength = 30, scrollX=T, scrollY=T) )
diver_gral %>% filter(index== "chao") %>%
ggplot(aes(x=origin, y = value,fill=origin)) +
geom_violin()+
geom_boxplot(width=.2) +
facet_wrap(.~database2)+
ggtitle ("chao index") +xlab("Cohort origin") +ylab("chao Index")+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1, legend.position = "none",strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust=1))
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
diver_gral %>% filter(index == "chao") %>% group_by(origin, database2)%>% summarise(media = mean(value)) %>%
datatable(rownames = FALSE, filter="top", options = list(pageLength = 30, scrollX=T, scrollY=T) )
## `summarise()` has grouped output by 'origin'. You can override using the
## `.groups` argument.
grouped_ggbetweenstats(
data = diver_gral %>% filter(index== "chao"),
x = origin,
y = value,
grouping.var = database2)
diver_gral %>% filter(index== "dominance.simpson") %>%
ggplot(aes(x=origin, y = value,fill=origin)) +
geom_violin()+
geom_boxplot(width=.2) +
facet_wrap(.~database2)+
ggtitle ("dominance simpson diversity") +xlab("Cohort origin") +ylab("simpson Index")+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1, legend.position = "none",strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust=1))
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
diver_gral %>% filter(index == "dominance.simpson") %>% group_by(origin, database2)%>% summarise(media = mean(value)) %>%
datatable(rownames = FALSE, filter="top", options = list(pageLength = 30, scrollX=T, scrollY=T))
## `summarise()` has grouped output by 'origin'. You can override using the
## `.groups` argument.
grouped_ggbetweenstats(
data = diver_gral %>% filter(index== "dominance.simpson"),
x = origin,
y = value,
grouping.var = database2)
diver_gral %>% filter(index== "evenness.simpson") %>%
ggplot(aes(x=origin, y = value,fill=origin)) +
geom_violin()+
geom_boxplot(width=.2) +
facet_wrap(.~database2)+
ggtitle ("eveness simpson diversity") +xlab("Cohort origin") +ylab("simpson Index")+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1, legend.position = "none",strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust=1))
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
diver_gral %>% filter(index == "evenness.simpson") %>% group_by(origin, database2)%>% summarise(media = mean(value)) %>%
datatable(rownames = FALSE, filter="top", options = list(pageLength = 30, scrollX=T, scrollY=T))
## `summarise()` has grouped output by 'origin'. You can override using the
## `.groups` argument.
grouped_ggbetweenstats(
data = diver_gral %>% filter(index== "evenness.simpson"),
x = origin,
y = value,
grouping.var = database2)
as.data.frame(PCA$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
ggtitle ("Principal Component Analysis") +xlab("PC1 47.29%") +ylab("PC2 21.43%")
as.data.frame(PCA_ABR$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
ggtitle ("Principal Component Analysis") +xlab("PC1 70.76%") +ylab("PC2 6.89%")
as.data.frame(PCA_BAC$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
ggtitle ("Principal Component Analysis") +xlab("PC1 51.78%") +ylab("PC2 10.88%")
as.data.frame(PCA_REL$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
ggtitle ("Principal Component Analysis") +xlab("PC1 75.26%") +ylab("PC2 12.94%")
as.data.frame(PCA_BAC_Metal$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
ggtitle ("Principal Component Analysis") +xlab("PC1 50.87%") +ylab("PC2 10.29%")
as.data.frame(PCA_BAC_Biocide$x[,1:2]) %>% rownames_to_column("label") %>% inner_join(Tabla_inicial) %>%
select(label,PC1,PC2,origin) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = origin)) + geom_point() +
ggtitle ("Principal Component Analysis") +xlab("PC1 53.32%") +ylab("PC2 10.54%")
Guayana_tsne_spearman <- as.matrix(Guayana_tsne_spearman)
Guayana_tsne_spearman_REL <- as.matrix(Guayana_tsne_spearman_REL)
Guayana_tsne_spearman_ABR <- as.matrix(Guayana_tsne_spearman_ABR)
Guayana_tsne_spearman_BAC <- as.matrix(Guayana_tsne_spearman_BAC)
Guayana_tsne_spearman_BAC_Metal = get_dist(Guayana_spread_BAC_Metal, method = "spearman")
Guayana_tsne_spearman_BAC_Biocide = get_dist(Guayana_spread_BAC_Biocide, method = "spearman")
umap_plot <- umap(Guayana_tsne_spearman, input="dist")
umap_plot_REL <- umap(Guayana_tsne_spearman_REL, input="dist")
umap_plot_ABR <- umap(Guayana_tsne_spearman_ABR, input="dist")
umap_plot_BAC <- umap(Guayana_tsne_spearman_BAC, input="dist")
umap_plot_BAC_Metal <- umap(Guayana_spread_BAC_Metal, input="dist")
umap_plot_BAC_Biocide <- umap(Guayana_spread_BAC_Biocide, input="dist")
umap_plot <- as.data.frame(umap_plot$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_REL <- as.data.frame(umap_plot_REL$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_ABR <- as.data.frame(umap_plot_ABR$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_BAC <- as.data.frame(umap_plot_BAC$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_BAC_Metal <- as.data.frame(umap_plot_BAC_Metal$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_BAC_Biocide <- as.data.frame(umap_plot_BAC_Biocide$layout) %>% rownames_to_column("label") %>% inner_join(., Tabla_inicial)
umap_plot_ABR %>%
select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
ggtitle ("UMAP ABR")+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1, legend.position = "none",strip.background = element_blank(),
axis.text.x = element_text(angle = 0, hjust=1))
umap_plot_BAC %>%
select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
ggtitle ("UMAP BAC")+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1, legend.position = "none",strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust=1))
umap_plot_REL %>%
select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
ggtitle ("UMAP REL")+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1, legend.position = "none",strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust=1))
umap_plot_BAC_Metal %>%
select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
ggtitle ("UMAP BAC Metal")+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1, legend.position = "none",strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust=1))
umap_plot_BAC_Biocide %>%
select(label,V1,V2,origin) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = origin)) + geom_point()+
ggtitle ("UMAP BAC biocide")+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1, legend.position = "none",strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust=1))
ggplot(tsne_spearman_30_plot) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE")
ggplot(tsne_spearman_30_plot_ABR) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE ABR")
ggplot(tsne_spearman_30_plot_BAC) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE BAC")
ggplot(tsne_spearman_30_plot_REL) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE REL")
ggplot(tsne_spearman_30_plot_BAC_Metal) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE metal")
ggplot(tsne_spearman_30_plot_BAC_Biocide) + geom_point(aes(x=x,y=y,color=origin)) + ggtitle("T-SNE biocide")
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = Guayana_spread_t[, 1:5199] ~ origin, data = Guayana_spread_t, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## origin 1 2.0739 0.09944 13.471 0.001 ***
## Residual 122 18.7817 0.90056
## Total 123 20.8556 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = Guayana_spread_ABR_t[, 1:367] ~ origin, data = Guayana_spread_ABR_t, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## origin 1 1.7652 0.12826 17.95 0.001 ***
## Residual 122 11.9974 0.87174
## Total 123 13.7626 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = Guayana_spread_BAC_t[, 1:4429] ~ origin, data = Guayana_spread_BAC_t, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## origin 1 1.781 0.03165 3.9879 0.001 ***
## Residual 122 54.480 0.96835
## Total 123 56.261 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = Guayana_spread_REL_t[, 1:403] ~ origin, data = Guayana_spread_REL_t, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## origin 1 3.8464 0.14549 20.771 0.001 ***
## Residual 122 22.5916 0.85451
## Total 123 26.4380 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = Guayana_spread_BAC_Metal_t[, 1:2933] ~ origin, data = Guayana_spread_BAC_Metal_t, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## origin 1 1.139 0.02044 2.5463 0.001 ***
## Residual 122 54.580 0.97956
## Total 123 55.720 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = Guayana_spread_BAC_Biocide_t[, 1:2000] ~ origin, data = Guayana_spread_BAC_Biocide_t, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## origin 1 1.952 0.0353 4.391 0.001 ***
## Residual 120 53.341 0.9647
## Total 121 55.293 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## # A tibble: 3 × 5
## # Groups: database [3]
## database statistic p.value method alternative
## <chr> <dbl> <dbl> <chr> <chr>
## 1 ABR 1446. 0.686 Wilcoxon rank sum test with continuity… two.sided
## 2 BAC 1222. 0.359 Wilcoxon rank sum test with continuity… two.sided
## 3 REL 1544. 0.328 Wilcoxon rank sum test with continuity… two.sided
## # A tibble: 1 × 1
## media
## <dbl>
## 1 309.
## # A tibble: 1 × 1
## media
## <dbl>
## 1 266.
## # A tibble: 1 × 1
## media
## <dbl>
## 1 9783.
## # A tibble: 1 × 1
## media
## <dbl>
## 1 266.
tabla_core <- Tabla_inicial%>% group_by(template) %>% summarise(TotalFreq = n())%>% ungroup() %>% filter(TotalFreq == 124)
datatable(tabla_core, rownames = FALSE, filter="top", options = list(pageLength = 30, scrollX=T, scrollY=T) )
tabla_core_eur <- Tabla_inicial %>% filter(origin=="europa") %>% group_by(template) %>% summarise(TotalFreq = n())%>% ungroup() %>% filter(TotalFreq == 29)
datatable(tabla_core_eur, rownames = FALSE, filter="top", options = list(pageLength = 30, scrollX=T, scrollY=T) )
tabla_core_guayana <- Tabla_inicial %>% filter(origin=="guayana") %>% group_by(template) %>% summarise(TotalFreq = n())%>% ungroup() %>% filter(TotalFreq == 95)
datatable(tabla_core_guayana, rownames = FALSE, filter="top", options = list(pageLength = 30, scrollX=T, scrollY=T) )
ABR_common_genes <- tabla_para_teresa_ABR %>% group_by(Label) %>% summarise(number = n())
BAC_common_genes<-tabla_para_teresa_BAC %>% distinct(template,Label,TotalFreq) %>% group_by(Label) %>% summarise(number = n())
REL_common_genes <- tabla_para_teresa_REL %>% group_by(Label) %>% summarise(number = n())
ABR_notcommon_genes <- anti_tabla_para_teresa_ABR %>% group_by(Label) %>% summarise(number = n())
BAC_notcommon_genes <- anti_tabla_para_teresa_BAC %>%distinct(template,Label,TotalFreq) %>% group_by(Label) %>% summarise(number = n())
REL_notcommon_genes <- anti_tabla_para_teresa_REL %>% group_by(Label) %>% summarise(number = n())
Tipo <- c('ABR','BAC','REL')
aaEur <- c(ABR_notcommon_genes$number[1],BAC_notcommon_genes$number[1],REL_notcommon_genes$number[1])
All <- c( ABR_common_genes$number[1],BAC_common_genes$number[1],REL_common_genes$number[1])
Gui <- c(ABR_notcommon_genes$number[2],BAC_notcommon_genes$number[2],REL_notcommon_genes$number[2])
mix_num_genes <- data.frame( Tipo, aaEur,All,Gui)
mix_num_genes %>% pivot_longer(cols=aaEur:Gui, names_to = "cosa", values_to = "value")%>%
ggplot(aes(x = cosa, y = value, fill = Tipo))+geom_col(position = "dodge")+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1, legend.position = "none",strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust=1))
mix_num_genes %>% pivot_longer(cols=aaEur:Gui, names_to = "cosa", values_to = "value")%>%
ggplot(aes(x = Tipo, y = value, fill = cosa))+geom_col(position = "dodge")+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1,strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust=1))
mix_num_genes %>% pivot_longer(cols=aaEur:Gui, names_to = "cosa", values_to = "value")
## # A tibble: 9 × 3
## Tipo cosa value
## <chr> <chr> <int>
## 1 ABR aaEur 109
## 2 ABR All 156
## 3 ABR Gui 103
## 4 BAC aaEur 759
## 5 BAC All 1280
## 6 BAC Gui 2390
## 7 REL aaEur 72
## 8 REL All 240
## 9 REL Gui 91
Tabla_inicial %>% select(template, origin, database) %>% distinct() %>% group_by(origin, database)%>% summarise(number = n()) %>%
ggplot(aes(x = database, y = number, fill = origin))+geom_col(position = "dodge")+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1,strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust=1))
## `summarise()` has grouped output by 'origin'. You can override using the
## `.groups` argument.
Tabla_inicial %>% select(template, origin, database) %>% distinct() %>% group_by(origin, database)%>% summarise(number = n())
## `summarise()` has grouped output by 'origin'. You can override using the
## `.groups` argument.
## # A tibble: 6 × 3
## # Groups: origin [2]
## origin database number
## <chr> <chr> <int>
## 1 europa ABR 264
## 2 europa BAC 2039
## 3 europa REL 312
## 4 guayana ABR 259
## 5 guayana BAC 3670
## 6 guayana REL 331
## # A tibble: 9 × 3
## Tipo cosa value
## <chr> <chr> <dbl>
## 1 ABR aaEur 12
## 2 ABR All 10
## 3 ABR Gui 17
## 4 BAC aaEur 0
## 5 BAC All 0
## 6 BAC Gui 0
## 7 REL aaEur 8
## 8 REL All 6
## 9 REL Gui 24
rmtF_1_JQ808129
#Amerindians <- Tabla_inicial %>%select(origin) %>% filter(origin == "guayana")
tabla_america <- Tabla_inicial %>% filter(origin == "guayana") %>% group_by(label) %>% distinct(template,label, .keep_all = TRUE) %>% ungroup %>%
group_by(template) %>% summarize(suma = n())
tabla_europa <- Tabla_inicial %>% filter(origin == "europa") %>% group_by(label) %>% distinct(template,label, .keep_all = TRUE) %>% ungroup %>%
group_by(template) %>% summarize(suma = n())
tabla_chi <- full_join(tabla_europa, tabla_america, by= "template")
colnames(tabla_chi) <- c("template","europe_P", "america_P" )
tabla_chi[is.na(tabla_chi)] <- 0
tabla_chi_filt <- tabla_chi %>% mutate(europe_N = 29 - europe_P ) %>% mutate(america_N = 95 - america_P )#%>% filter(europe_P > 2 | america_P > 9 )
chi_pvalue <- tabla_chi_filt %>%
rowwise() %>%
mutate(
p.value = fisher.test(matrix(c(europe_P, europe_N, america_P, america_N), nrow = 2))$p.value,
odds_ratio = fisher.test(matrix(c(europe_P, europe_N, america_P, america_N), nrow = 2))$estimate,
)
colnames(chi_pvalue) <- c("template","europe_P","america_P","europe_N","america_N","p_value", "odds_ratio")
chi_pvalue <- as.data.frame(chi_pvalue)
chi_pvalue.adj <-chi_pvalue %>% mutate(padj= p.adjust(chi_pvalue$p_value, method = "BH")) #%>% filter(padj<0.05)
chi_pvalue.adj <- chi_pvalue.adj %>% mutate(america_odds = europe_P*america_N) %>% mutate(europe_odds = america_P*europe_N)
chi_pvalue.adj <- chi_pvalue.adj %>% mutate(result1= america_odds/europe_odds) %>% mutate(result2 = europe_odds/america_odds)
chi_pvalue.adj <- chi_pvalue.adj %>% mutate(porcentaje_eur=europe_P/(europe_P+europe_N)) %>% mutate(porcentaje_amer=america_P/(america_P+america_N))
chi_pvalue.adj[is.na(chi_pvalue.adj)] <- 0
chi_pvalue.adj$odds_ratio <- ifelse(chi_pvalue.adj$odds_ratio < 1, -1/chi_pvalue.adj$odds_ratio, chi_pvalue.adj$odds_ratio )
chi_pvalue.adj$direction <- ifelse(chi_pvalue.adj$odds_ratio < 0, 'america', 'europe')
tmpp <- Tabla_inicial %>% select(template, database) %>% distinct()
chi_pvalue.adj <- left_join(chi_pvalue.adj, tmpp, by = "template")
fisher_abr <- chi_pvalue.adj %>% filter(database == "ABR") %>% slice_min(padj, n = 5)
fisher_bac <- chi_pvalue.adj %>% filter(database == "BAC") %>% slice_min(padj, n = 5)
fisher_rel <- chi_pvalue.adj %>% filter(database == "REL") %>% slice_min(padj, n = 5)
fisher_label <- bind_rows(fisher_abr,fisher_bac, fisher_rel) %>% select(template)
fisher_label$label <- fisher_label$template
chi_pvalue.adj.plot <- chi_pvalue.adj
chi_pvalue.adj.plot$fisher <- ifelse(chi_pvalue.adj.plot$padj<0.05, "yes", "no" )
chi_pvalue.adj.plot <- left_join(chi_pvalue.adj.plot, fisher_label)
# chi_pvalue.adj.plot$label <- ifelse(chi_pvalue.adj.plot$padj<0.000000000000001, chi_pvalue.adj.plot$template, "" )
# chi_pvalue.adj.plot$label <- ifelse(chi_pvalue.adj.plot$padj<0.000000000000001, chi_pvalue.adj.plot$template, "" )
chi_pvalue.adj.plot %>% ggplot(aes(porcentaje_eur, porcentaje_amer, color= fisher))+geom_point()+
geom_text_repel(aes(label=label),hjust = 0, nudge_x = 0.05, max.overlaps = 99)+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1, legend.position = "none",strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust=1))
chi_pvalue.adj <-chi_pvalue.adj %>% filter(padj<0.05)
# chi_pvalue.adj$database = "ABR"
# chi_pvalue.adj$database[grepl("^BAC",chi_pvalue.adj$template)] = "BAC"
# chi_pvalue.adj$database[grepl("^MOB",chi_pvalue.adj$template)] = "REL"
# chi_pvalue_rel <- chi_pvalue.adj %>% filter(str_detect(template, "^MOB"))
# chi_pvalue_rel_short <- chi_pvalue.adj %>% filter(str_detect(template, "^MOB"))
# chi_pvalue_rel_short$template <- sub( "_.*" ," ", chi_pvalue_rel_short$template)
chi_pvalue_rel <- chi_pvalue.adj %>%filter(database == "REl")
chi_pvalue_rel_short <- chi_pvalue.adj %>%filter(database == "REL")
chi_pvalue_rel_short$template <- sub( "_.*" ," ", chi_pvalue_rel_short$template)
chi_pvalue_abr <- chi_pvalue.adj %>% filter(database == "ABR")
chi_pvalue_abr_short <- chi_pvalue.adj %>%filter(database == "ABR")
#no se puede simplificar sin perder info
# chi_pvalue_abr_short$template <- sub( ":.*" ," ", chi_pvalue_abr_short$template)
# chi_pvalue_abr_short$template <- sub( "(_[^_]+)_.*" ,"\\1", chi_pvalue_abr_short$template)
# chi_pvalue_abr_short$template = sapply(chi_pvalue_abr_short$template , function(x) paste(strsplit(x, "_")[[1]][1:2], collapse = '_'))
chi_pvalue_bac <- chi_pvalue.adj %>% filter(database == "BAC")
# chi_pvalue_bac_short <- chi_pvalue.adj %>% filter(database == "BAC") %>% left_join(Bac_met_db, by = 'template')
# chi_pvalue_bac_short$template <- sub( "\\|+" ,"-", chi_pvalue_bac_short$template)
# chi_pvalue_bac_short$template <- sub( "(\\|[^\\|]+)\\|.*" ,"", chi_pvalue_bac_short$template)
chi_pvalue_abr_short_longzzz <- chi_pvalue_abr_short %>%
select(template,porcentaje_eur,porcentaje_amer) %>%
melt( id.vars = 'template')
chi_pvalue_abr_short_longzzz$value <- ifelse(chi_pvalue_abr_short_longzzz$variable == 'porcentaje_eur', chi_pvalue_abr_short_longzzz$value*-1, chi_pvalue_abr_short_longzzz$value)
joinedabr <- full_join(chi_pvalue_abr_short_longzzz, chi_pvalue_abr_short, by='template')
joinedabrz <- joinedabr %>% arrange(desc(value))
americcc <- joinedabrz %>% filter(variable == 'porcentaje_amer')
europpp <- joinedabrz %>% filter(variable == 'porcentaje_eur')
ggplot() +
geom_col(data=americcc, aes(reorder(template, -padj), value, fill=padj)) +
scale_fill_viridis_c(option = "D") +
new_scale_fill() +
geom_col(data= europpp, aes(reorder(template, -padj), value, fill=padj)) +
scale_fill_viridis_c(option = "C")+
coord_flip()
chi_pvalue_BAC_short_longzzz <- chi_pvalue_bac %>%
select(template,porcentaje_eur,porcentaje_amer) %>%
melt( id.vars = 'template')
chi_pvalue_BAC_short_longzzz$value <- ifelse(chi_pvalue_BAC_short_longzzz$variable == 'porcentaje_eur', chi_pvalue_BAC_short_longzzz$value*-1, chi_pvalue_BAC_short_longzzz$value)
chi_pvalue_rel_short <- chi_pvalue_rel_short %>% arrange(desc(odds_ratio))
joinedBAC <- full_join(chi_pvalue_BAC_short_longzzz, chi_pvalue_bac, by='template')
joinedBACz <- joinedBAC %>% arrange(desc(value))
americcc <- joinedBACz %>% filter(variable == 'porcentaje_amer')
europpp <- joinedBACz %>% filter(variable == 'porcentaje_eur')
ggplot() +
geom_col(data=americcc, aes(reorder(template, -padj), value, fill=padj)) +
scale_fill_viridis_c(option = "D") +
new_scale_fill() +
geom_col(data= europpp, aes(reorder(template, -padj), value, fill=padj)) +
scale_fill_viridis_c(option = "C")+
coord_flip()
chi_pvalue_REL_short_longzzz <- chi_pvalue_rel_short %>%
select(template,porcentaje_eur,porcentaje_amer) %>%
melt( id.vars = 'template')
chi_pvalue_REL_short_longzzz$value <- ifelse(chi_pvalue_REL_short_longzzz$variable == 'porcentaje_eur', chi_pvalue_REL_short_longzzz$value*-1, chi_pvalue_REL_short_longzzz$value)
chi_pvalue_rel_short <- chi_pvalue_rel_short %>% arrange(desc(odds_ratio))
joinedREL <- full_join(chi_pvalue_REL_short_longzzz, chi_pvalue_rel_short, by='template')
joinedRELz <- joinedREL %>% arrange(desc(value))
americcc <- joinedRELz %>% filter(variable == 'porcentaje_amer')
europpp <- joinedRELz %>% filter(variable == 'porcentaje_eur')
ggplot() +
geom_col(data=americcc, aes(reorder(template, -padj), value, fill=padj)) +
scale_fill_viridis_c(option = "D") +
new_scale_fill() +
geom_col(data= europpp, aes(reorder(template, -padj), value, fill=padj)) +
scale_fill_viridis_c(option = "C")+
coord_flip()
plotLDA(res_origin_ABR_LEFSE,group=c("europa","guayana"),lda=3,pvalue=0.05)
plotLDA(res_origin_BAC_LEFSE,group=c("europa","guayana"),lda=4,pvalue=0.05)
plotLDA(res_origin_REL_LEFSE,group=c("europa","guayana"),lda=5,pvalue=0.05)
pheatmap(log(heat_padj+1), clustering_method = "ward.D2")
pheatmap(log(heat_padj_abr+1), clustering_method = "ward.D2")
pheatmap(log(heat_padj_rel+1), clustering_method = "ward.D2")
# pheatmap(log(theat_padj_bac+1), clustering_method = "ward.D2")
pheatmap(log(heat_padj_bac+1), clustering_method = "ward.D2")
pheatmap(log(heat_padj+1), clustering_method = "ward.D2")
pheatmap(log(heat_padj_abr+1), clustering_method = "ward.D2")
Heatmap of the Antimicrobial resistance genes. The genes that appear on the y axis and the individuals of the x axis were previously selected From an ANOVA test with p-value lower than 0,05 in. Red indicates the highest value of appearance of the gene in the population and blue the contrary.
pheatmap(log(heat_padj_rel+1), clustering_method = "ward.D2")
Heatmap of the Relaxases resistance genes. The genes that appear on the y axis and the individuals of the x axis were previously selected From an ANOVA test with p-value lower than 0,05 in. Red indicates the highest value of appearance of the gene in the population and blue the contrary.
# pheatmap(log(theat_padj_bac+1), clustering_method = "ward.D2")
pheatmap(log(heat_padj_bac+1), clustering_method = "ward.D2")
Heatmap of the Biocides and metals resistance genes. The genes that appear on the y axis and the individuals of the x axis were previously selected From an ANOVA test with p-value lower than 0,05 in. Red indicates the highest value of appearance of the gene in the population and blue the contrary.
# pheatmap(log(theat_padj_bac+1), clustering_method = "ward.D2")
pheatmap(log(heat_padj_bac+1), clustering_method = "ward.D2")
# pheatmap(log(heat_padj+1), clustering_method = "ward.D2")
# pheatmap(log(heat_padj_abr+1), clustering_method = "ward.D2")
Heatmap of the Antimicrobial resistance genes. The genes that appear on the y axis and the individuals of the x axis were previously selected From an ANOVA test with p-value lower than 0,05 in. Red indicates the highest value of appearance of the gene in the population and blue the contrary.
# pheatmap(log(heat_padj_rel+1), clustering_method = "ward.D2")
Heatmap of the Relaxases resistance genes. The genes that appear on the y axis and the individuals of the x axis were previously selected From an ANOVA test with p-value lower than 0,05 in. Red indicates the highest value of appearance of the gene in the population and blue the contrary.
Heatmap of the Biocides and metals resistance genes. The genes that appear on the y axis and the individuals of the x axis were previously selected From an ANOVA test with p-value lower than 0,05 in. Red indicates the highest value of appearance of the gene in the population and blue the contrary.
# pheatmap(log(heat_padj+1), clustering_method = "ward.D2")
meta_all<- meta_all %>% filter(Df > 0)%>% filter(`Pr(>F)` < 0.05) %>% rownames_to_column(var = "term")
meta_abr<- meta_abr %>% filter(Df > 0) %>% filter(`Pr(>F)` < 0.05)%>% rownames_to_column(var = "term")
meta_bac<- meta_bac %>% filter(Df > 0) %>% filter(`Pr(>F)` < 0.05)%>% rownames_to_column(var = "term")
meta_rel<- meta_rel %>% filter(Df > 0) %>% filter(`Pr(>F)` < 0.05)%>% rownames_to_column(var = "term")
meta_bac_metal<- meta_bac_metal %>% filter(Df > 0) %>% filter(`Pr(>F)` < 0.05)%>% rownames_to_column(var = "term")
meta_bac_biocide<- meta_bac_biocide %>% filter(Df > 0) %>% filter(`Pr(>F)` < 0.05)%>% rownames_to_column(var = "term")
meta_all$lib <- "ALL"
meta_abr$lib <- "ABR"
meta_bac$lib <- "BAC"
meta_rel$lib <- "REL"
meta_bac_metal$lib <- "BAC_biocide"
# meta_bac_biocide$lib <- "BAC_biocide"
meta <- rbind(meta_all, meta_abr,meta_bac,meta_rel, meta_bac_metal, meta_bac_biocide)
metameta <- meta
metameta$label <- ifelse(metameta$`Pr(>F)`<0.01, "0.01", ifelse(metameta$`Pr(>F)`<0.02, "0.02", ifelse(metameta$`Pr(>F)`<0.03, "0.03", ifelse(metameta$`Pr(>F)`<0.04, "0.04", ifelse(metameta$`Pr(>F)`<0.05, "0.05", "XD")))))
meta %>% ggplot(aes(x=lib, y=term)) + geom_tile(aes(fill=`Pr(>F)`))+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1,strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust=1))
metameta %>% ggplot(aes(x=lib, y=term)) + geom_tile(aes(fill=label))+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1,strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust=1))