Load Rarefaction
## [1] 366155
Proporcion por
semana
Tabla_inicial_info %>% filter(Tipo == 1) %>% group_by(Semana, database) %>% summarise(Different_genes_all = n()) %>% ggplot(aes(x=Semana,y=Different_genes_all,fill=database)) + geom_col(position = "fill")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
legend.position = "none",strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust=1), axis.title.x = element_blank(), axis.title.y = element_blank() )
## `summarise()` has grouped output by 'Semana'. You can override using the
## `.groups` argument.

diversidad
tablas
Tabla_gral <- Tabla_inicial %>% select(template, depth, dataset) %>% spread(template,depth, fill = 0) %>%
column_to_rownames("dataset") %>% t(.) %>% otu_table(taxa_are_rows = T)
Tabla_ABR <- Tabla_inicial %>% filter (database=="ABR") %>% select(template, depth, dataset) %>% spread(template,depth, fill = 0) %>%
column_to_rownames("dataset") %>% t(.) %>% otu_table(taxa_are_rows = T)
Tabla_BAC <- Tabla_inicial %>% filter (database=="BAC") %>% select(template, depth, dataset) %>% spread(template,depth, fill = 0) %>%
column_to_rownames("dataset") %>% t(.) %>% otu_table(taxa_are_rows = T)
Tabla_REL <- Tabla_inicial %>% filter (database=="REL") %>% select(template, depth, dataset) %>% spread(template,depth, fill = 0) %>%
column_to_rownames("dataset") %>% t(.) %>% otu_table(taxa_are_rows = T)
diver_gral <- alpha(Tabla_gral, index = c("Shannon", "Chao1", "Simpson")) %>%
rename(shannon_gral= "diversity_shannon", chao_gral = 'chao1', dominance.simpson_gral = 'dominance_simpson', evenness.simpson_gral = 'evenness_simpson' ) %>%
rownames_to_column(.,var="dataset")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
diver_gral <- alpha(Tabla_ABR, index = c("Shannon", "Chao1", "Simpson")) %>%
rename(shannon_ABR= "diversity_shannon", chao_ABR = 'chao1', dominance.simpson_ABR = 'dominance_simpson', evenness.simpson_ABR = 'evenness_simpson' ) %>%
rownames_to_column(.,var="dataset") %>% left_join(diver_gral,., by="dataset")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
diver_gral <- alpha(Tabla_BAC, index = c("Shannon", "Chao1", "Simpson")) %>%
rename(shannon_BAC= "diversity_shannon", chao_BAC = 'chao1', dominance.simpson_BAC = 'dominance_simpson', evenness.simpson_BAC = 'evenness_simpson' ) %>%
rownames_to_column(.,var="dataset") %>% left_join(diver_gral,., by="dataset")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
diver_gral <- alpha(Tabla_REL, index = c("Shannon", "Chao1", "Simpson")) %>%
rename(shannon_REL= "diversity_shannon", chao_REL = 'chao1', dominance.simpson_REL = 'dominance_simpson', evenness.simpson_REL = 'evenness_simpson' ) %>%
rownames_to_column(.,var="dataset") %>% left_join(diver_gral,., by="dataset")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
tmp_treat_notreat <- Tabla_inicial %>% select(dataset, Tipo) %>% distinct()
diver_gral <- left_join(diver_gral, tmp_treat_notreat)
## Joining with `by = join_by(dataset)`
diver_gral <- diver_gral %>%
pivot_longer(cols=chao_gral:dominance.simpson_REL, names_to = "cosa", values_to = "value") %>%
separate(cosa, into=c("index", "database2"), sep="_")
shapiro <- diver_gral %>%
group_by(database2, index, Tipo) %>%
do(tidy(shapiro.test(.$value)))
datatable(shapiro, rownames = FALSE, filter="top", options = list(pageLength = 30, scrollX=T, scrollY=T) )
wilcox <- diver_gral %>%
group_by(database2, index) %>%
do(tidy(wilcox.test(.$value~.$Tipo)))
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
datatable(wilcox, rownames = FALSE, filter="top", options = list(pageLength = 30, scrollX=T, scrollY=T) )
student <- diver_gral %>%
group_by(database2, index) %>%
do(tidy(t.test(.$value~.$Tipo)))
datatable(student, rownames = FALSE, filter="top", options = list(pageLength = 30, scrollX=T, scrollY=T) )
violin
diver_gral %>% filter(index== "shannon") %>%
ggplot(aes(x=Tipo, y = value,fill=Tipo)) +
geom_violin()+
geom_boxplot(width=.2) +
facet_wrap(.~database2)+
ggtitle ("Shannon diversity") +xlab("Cohort origin") +ylab("Shannon Index")+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1, legend.position = "none",strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust=1))

PCA
Guayana_spread<- Tabla_inicial %>% select(template,depth,dataset) %>% group_by(template,dataset) %>%
summarise(depth = max(depth)) %>% distinct(template, dataset, .keep_all = T) %>% spread(template,depth, fill = 0)%>% column_to_rownames(var = "dataset")
## `summarise()` has grouped output by 'template'. You can override using the
## `.groups` argument.
Guayana_spread_BAC<- Tabla_inicial %>% filter(database=="BAC") %>% select(template,depth,dataset) %>% group_by(template,dataset) %>%
summarise(depth = max(depth)) %>% distinct(template, dataset, .keep_all = T) %>% spread(template,depth, fill = 0)%>%
column_to_rownames(var = "dataset")
## `summarise()` has grouped output by 'template'. You can override using the
## `.groups` argument.
Guayana_spread_ABR<- Tabla_inicial %>% filter(database=="ABR") %>% select(template,depth,dataset) %>% group_by(template,dataset) %>%
summarise(depth = max(depth)) %>% distinct(template, dataset, .keep_all = T) %>% spread(template,depth, fill = 0)%>%
column_to_rownames(var = "dataset")
## `summarise()` has grouped output by 'template'. You can override using the
## `.groups` argument.
Guayana_spread_REL<- Tabla_inicial %>% filter(database=="REL") %>% select(template,depth,dataset) %>% group_by(template,dataset)%>% summarise(depth = max(depth)) %>% distinct(template, dataset, .keep_all = T) %>% spread(template,depth, fill = 0)%>%
column_to_rownames(var = "dataset")
## `summarise()` has grouped output by 'template'. You can override using the
## `.groups` argument.
Guayana_tsne_spearman = get_dist(Guayana_spread, method = "spearman")
Guayana_tsne_spearman_BAC = get_dist(Guayana_spread_BAC, method = "spearman")
Guayana_tsne_spearman_REL = get_dist(Guayana_spread_REL, method = "spearman")
Guayana_tsne_spearman_ABR = get_dist(Guayana_spread_ABR, method = "spearman")
PCA <- prcomp(Guayana_tsne_spearman)
PCA_BAC <- prcomp(Guayana_tsne_spearman_BAC)
PCA_ABR <- prcomp(Guayana_tsne_spearman_ABR)
PCA_REL <- prcomp(Guayana_tsne_spearman_REL)
PCA 0/1
as.data.frame(PCA$x[,1:2]) %>% rownames_to_column("dataset") %>% inner_join(Tabla_inicial) %>%
select(dataset,PC1,PC2,Tipo) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = Tipo)) + geom_point() +
ggtitle ("Principal Component Analysis") +xlab("PC1 66%") +ylab("PC2 9%")
## Joining with `by = join_by(dataset)`

PCA 0/1 ABR
as.data.frame(PCA_ABR$x[,1:2]) %>% rownames_to_column("dataset") %>% inner_join(Tabla_inicial) %>%
select(dataset,PC1,PC2,Tipo) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = Tipo)) + geom_point() +
ggtitle ("Principal Component Analysis") +xlab("PC1 22.8%") +ylab("PC2 18%")
## Joining with `by = join_by(dataset)`

PCA 0/1 BAC
as.data.frame(PCA_BAC$x[,1:2]) %>% rownames_to_column("dataset") %>% inner_join(Tabla_inicial) %>%
select(dataset,PC1,PC2,Tipo) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = Tipo)) + geom_point() +
ggtitle ("Principal Component Analysis") +xlab("PC1 22.8%") +ylab("PC2 18%")
## Joining with `by = join_by(dataset)`

PCA 0/1 REL
as.data.frame(PCA_REL$x[,1:2]) %>% rownames_to_column("dataset") %>% inner_join(Tabla_inicial) %>%
select(dataset,PC1,PC2,Tipo) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = Tipo)) + geom_point() +
ggtitle ("Principal Component Analysis") +xlab("PC1 22.8%") +ylab("PC2 18%")
## Joining with `by = join_by(dataset)`

PCA Semana
as.data.frame(PCA$x[,1:2]) %>% rownames_to_column("dataset") %>% inner_join(Tabla_inicial) %>%
select(dataset,PC1,PC2,Semana) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = as.numeric(Semana))) + geom_point() +
scale_color_gradient(low='yellow', high = 'black')+
ggtitle ("Principal Component Analysis") +xlab("PC1 66%") +ylab("PC2 9%")
## Joining with `by = join_by(dataset)`

PCA Semana ABR
as.data.frame(PCA_ABR$x[,1:2]) %>% rownames_to_column("dataset") %>% inner_join(Tabla_inicial) %>%
select(dataset,PC1,PC2,Semana) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = as.numeric(Semana))) + geom_point() +
scale_color_gradient(low='yellow', high = 'black')+
ggtitle ("Principal Component Analysis") +xlab("PC1 66%") +ylab("PC2 9%")
## Joining with `by = join_by(dataset)`

PCA Semana BAC
as.data.frame(PCA_BAC$x[,1:2]) %>% rownames_to_column("dataset") %>% inner_join(Tabla_inicial) %>%
select(dataset,PC1,PC2,Semana) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = as.numeric(Semana))) + geom_point() +
scale_color_gradient(low='yellow', high = 'black')+
ggtitle ("Principal Component Analysis") +xlab("PC1 66%") +ylab("PC2 9%")
## Joining with `by = join_by(dataset)`

PCA Semana REL
as.data.frame(PCA_REL$x[,1:2]) %>% rownames_to_column("dataset") %>% inner_join(Tabla_inicial) %>%
select(dataset,PC1,PC2,Semana) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = as.numeric(Semana))) + geom_point() +
scale_color_gradient(low='yellow', high = 'black')+
ggtitle ("Principal Component Analysis") +xlab("PC1 66%") +ylab("PC2 9%")
## Joining with `by = join_by(dataset)`

UMAP
Guayana_tsne_spearman <- as.matrix(Guayana_tsne_spearman)
Guayana_tsne_spearman_REL <- as.matrix(Guayana_tsne_spearman_REL)
Guayana_tsne_spearman_ABR <- as.matrix(Guayana_tsne_spearman_ABR)
Guayana_tsne_spearman_BAC <- as.matrix(Guayana_tsne_spearman_BAC)
umap_plot <- umap(Guayana_tsne_spearman, input="dist")
umap_plot_REL <- umap(Guayana_tsne_spearman_REL)
umap_plot_ABR <- umap(Guayana_tsne_spearman_ABR)
umap_plot_BAC <- umap(Guayana_tsne_spearman_BAC)
umap_plot <- as.data.frame(umap_plot$layout) %>% rownames_to_column("dataset") %>% inner_join(., Tabla_inicial)
## Joining with `by = join_by(dataset)`
umap_plot_REL <- as.data.frame(umap_plot_REL$layout) %>% rownames_to_column("dataset") %>% inner_join(., Tabla_inicial)
## Joining with `by = join_by(dataset)`
umap_plot_ABR <- as.data.frame(umap_plot_ABR$layout) %>% rownames_to_column("dataset") %>% inner_join(., Tabla_inicial)
## Joining with `by = join_by(dataset)`
umap_plot_BAC <- as.data.frame(umap_plot_BAC$layout) %>% rownames_to_column("dataset") %>% inner_join(., Tabla_inicial)
## Joining with `by = join_by(dataset)`
UMAP semana
umap_plot %>%
select(dataset,V1,V2,Semana) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = as.numeric(Semana))) + geom_point()+
scale_color_gradient(low='blue', high = 'red')+
ggtitle ("UMAP")+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1,strip.background = element_blank(),
axis.text.x = element_text(angle = 0, hjust=1))

UMAP semana ABR
umap_plot_ABR %>%
select(dataset,V1,V2,Semana) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = as.numeric(Semana))) + geom_point()+
scale_color_gradient(low='yellow', high = 'black')+
ggtitle ("UMAP")

UMAP semana BAC
umap_plot_BAC %>%
select(dataset,V1,V2,Semana) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = as.numeric(Semana))) + geom_point()+
scale_color_gradient(low='yellow', high = 'black')+
ggtitle ("UMAP")

UMAP semana REL
umap_plot_REL %>%
select(dataset,V1,V2,Semana) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = as.numeric(Semana))) + geom_point()+
scale_color_gradient(low='yellow', high = 'black')+
ggtitle ("UMAP")

UMAP 0/1
umap_plot %>%
select(dataset,V1,V2,Tipo) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = Tipo)) + geom_point()+
ggtitle ("UMAP")+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1,strip.background = element_blank(),
axis.text.x = element_text(angle = 0, hjust=1))

UMAP 0/1 ABR
umap_plot_ABR %>%
select(dataset,V1,V2,Tipo) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = Tipo)) + geom_point()+
ggtitle ("UMAP")+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1,strip.background = element_blank(),
axis.text.x = element_text(angle = 0, hjust=1))

UMAP 0/1 BAC
umap_plot_BAC %>%
select(dataset,V1,V2,Tipo) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = Tipo)) + geom_point()+
ggtitle ("UMAP")+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1,strip.background = element_blank(),
axis.text.x = element_text(angle = 0, hjust=1))

UMAP 0/1 REL
umap_plot_REL %>%
select(dataset,V1,V2,Tipo) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = Tipo)) + geom_point()+
ggtitle ("UMAP")+
theme_classic()+
theme(panel.background = element_blank(), panel.grid = element_blank(),
aspect.ratio = 1,strip.background = element_blank(),
axis.text.x = element_text(angle = 0, hjust=1))

STATS
Quantity of distinct
genes by origin
Tabla_inicial %>% group_by(database,Tipo, template) %>% summarise(Different_genes_all = n()) %>% ungroup() %>%
group_by(Tipo,database) %>% summarise(Different_genes_all_1 = n()) %>%
ggplot(aes(x=Tipo,y=Different_genes_all_1,fill=database)) + geom_col()
## `summarise()` has grouped output by 'database', 'Tipo'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'Tipo'. You can override using the
## `.groups` argument.

Mean of distinct
genes by origin
Tabla_inicial %>% group_by(database,Tipo, template) %>% summarise(Different_genes_all = n()) %>% ungroup() %>%
group_by(Tipo,database) %>% summarise(Different_genes_all_1 = mean(Different_genes_all)) %>%
ggplot(aes(x=Tipo,y=Different_genes_all_1,fill=database)) + geom_col()
## `summarise()` has grouped output by 'database', 'Tipo'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'Tipo'. You can override using the
## `.groups` argument.

Proportion unique
genes
Tabla_inicial %>% group_by(database,Tipo, template) %>% summarise(Different_genes_all = n()) %>% ungroup() %>%
group_by(Tipo,database) %>% summarise(Different_genes_all_1 = n()) %>%
ggplot(aes(x=Tipo,y=Different_genes_all_1,fill=database)) +geom_col(position = "fill")
## `summarise()` has grouped output by 'database', 'Tipo'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'Tipo'. You can override using the
## `.groups` argument.

Proportion unique
genes per person
Tabla_inicial %>% group_by(database,Tipo, template, dataset) %>% summarise(Different_genes_all = n()) %>% ggplot(aes(x=dataset,y=Different_genes_all,fill=database)) + geom_col(position = "fill")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
## `summarise()` has grouped output by 'database', 'Tipo', 'template'. You can
## override using the `.groups` argument.
