1 Load Rarefaction

## [1] 366155

2 Proporcion por semana

Tabla_inicial_info %>% filter(Tipo == 1)  %>%  group_by(Semana, database) %>% summarise(Different_genes_all = n()) %>% ggplot(aes(x=Semana,y=Different_genes_all,fill=database)) + geom_col(position = "fill")+ 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(),
        legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1), axis.title.x = element_blank(), axis.title.y =  element_blank() )
## `summarise()` has grouped output by 'Semana'. You can override using the
## `.groups` argument.

3 diversidad

3.1 tablas

Tabla_gral <- Tabla_inicial %>% select(template, depth, dataset) %>% spread(template,depth, fill = 0) %>%  
  column_to_rownames("dataset") %>% t(.) %>% otu_table(taxa_are_rows = T)
Tabla_ABR <- Tabla_inicial %>% filter (database=="ABR") %>%  select(template, depth, dataset) %>% spread(template,depth, fill = 0) %>%  
  column_to_rownames("dataset") %>% t(.) %>% otu_table(taxa_are_rows = T)
Tabla_BAC <- Tabla_inicial %>% filter (database=="BAC") %>%  select(template, depth, dataset) %>% spread(template,depth, fill = 0) %>%  
  column_to_rownames("dataset") %>% t(.) %>% otu_table(taxa_are_rows = T)
Tabla_REL <- Tabla_inicial %>% filter (database=="REL") %>%  select(template, depth, dataset) %>% spread(template,depth, fill = 0) %>%  
  column_to_rownames("dataset") %>% t(.) %>% otu_table(taxa_are_rows = T)

diver_gral <- alpha(Tabla_gral, index = c("Shannon", "Chao1", "Simpson"))  %>% 
  rename(shannon_gral= "diversity_shannon", chao_gral = 'chao1', dominance.simpson_gral = 'dominance_simpson', evenness.simpson_gral = 'evenness_simpson' ) %>% 
  rownames_to_column(.,var="dataset") 
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
diver_gral <- alpha(Tabla_ABR, index = c("Shannon", "Chao1", "Simpson")) %>% 
  rename(shannon_ABR= "diversity_shannon", chao_ABR = 'chao1', dominance.simpson_ABR = 'dominance_simpson', evenness.simpson_ABR = 'evenness_simpson' ) %>% 
  rownames_to_column(.,var="dataset")  %>%  left_join(diver_gral,., by="dataset")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
diver_gral <- alpha(Tabla_BAC, index = c("Shannon", "Chao1", "Simpson")) %>% 
  rename(shannon_BAC= "diversity_shannon", chao_BAC = 'chao1', dominance.simpson_BAC = 'dominance_simpson', evenness.simpson_BAC = 'evenness_simpson' ) %>% 
  rownames_to_column(.,var="dataset")  %>%  left_join(diver_gral,., by="dataset")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
diver_gral <- alpha(Tabla_REL, index = c("Shannon", "Chao1", "Simpson")) %>% 
  rename(shannon_REL= "diversity_shannon", chao_REL = 'chao1', dominance.simpson_REL = 'dominance_simpson', evenness.simpson_REL = 'evenness_simpson' ) %>% 
  rownames_to_column(.,var="dataset")  %>%  left_join(diver_gral,., by="dataset")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
tmp_treat_notreat <- Tabla_inicial %>% select(dataset, Tipo) %>% distinct()
diver_gral <- left_join(diver_gral, tmp_treat_notreat)
## Joining with `by = join_by(dataset)`
diver_gral <- diver_gral %>%
  pivot_longer(cols=chao_gral:dominance.simpson_REL, names_to = "cosa", values_to = "value") %>% 
  separate(cosa, into=c("index", "database2"), sep="_")  

shapiro <- diver_gral %>%
  group_by(database2, index, Tipo) %>%
  do(tidy(shapiro.test(.$value)))
datatable(shapiro, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )
wilcox <- diver_gral %>% 
  group_by(database2, index) %>% 
  do(tidy(wilcox.test(.$value~.$Tipo)))
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
datatable(wilcox, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )
student <- diver_gral %>%
  group_by(database2, index) %>%
  do(tidy(t.test(.$value~.$Tipo)))
datatable(student, rownames = FALSE, filter="top",  options = list(pageLength = 30, scrollX=T, scrollY=T) )

3.2 violin

diver_gral %>% filter(index== "shannon") %>% 
  ggplot(aes(x=Tipo, y = value,fill=Tipo)) +
  geom_violin()+
  geom_boxplot(width=.2) +
  facet_wrap(.~database2)+
  ggtitle ("Shannon diversity") +xlab("Cohort origin") +ylab("Shannon Index")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1, legend.position = "none",strip.background = element_blank(), 
        axis.text.x = element_text(angle = 45,  hjust=1))

4 PCA

Guayana_spread<- Tabla_inicial  %>%  select(template,depth,dataset) %>% group_by(template,dataset) %>% 
  summarise(depth = max(depth)) %>% distinct(template, dataset, .keep_all = T) %>%  spread(template,depth, fill = 0)%>% column_to_rownames(var = "dataset")
## `summarise()` has grouped output by 'template'. You can override using the
## `.groups` argument.
Guayana_spread_BAC<- Tabla_inicial %>% filter(database=="BAC") %>% select(template,depth,dataset) %>% group_by(template,dataset) %>% 
  summarise(depth = max(depth)) %>% distinct(template, dataset, .keep_all = T) %>%  spread(template,depth, fill = 0)%>% 
  column_to_rownames(var = "dataset")
## `summarise()` has grouped output by 'template'. You can override using the
## `.groups` argument.
Guayana_spread_ABR<- Tabla_inicial %>% filter(database=="ABR") %>% select(template,depth,dataset) %>% group_by(template,dataset) %>% 
  summarise(depth = max(depth)) %>% distinct(template, dataset, .keep_all = T) %>%  spread(template,depth, fill = 0)%>% 
  column_to_rownames(var = "dataset")
## `summarise()` has grouped output by 'template'. You can override using the
## `.groups` argument.
Guayana_spread_REL<- Tabla_inicial %>% filter(database=="REL") %>% select(template,depth,dataset) %>% group_by(template,dataset)%>% summarise(depth = max(depth)) %>% distinct(template, dataset, .keep_all = T) %>%  spread(template,depth, fill = 0)%>%
  column_to_rownames(var = "dataset")
## `summarise()` has grouped output by 'template'. You can override using the
## `.groups` argument.
Guayana_tsne_spearman = get_dist(Guayana_spread, method = "spearman")
Guayana_tsne_spearman_BAC = get_dist(Guayana_spread_BAC, method = "spearman")
Guayana_tsne_spearman_REL = get_dist(Guayana_spread_REL, method = "spearman")
Guayana_tsne_spearman_ABR = get_dist(Guayana_spread_ABR, method = "spearman")

PCA <- prcomp(Guayana_tsne_spearman)
PCA_BAC <- prcomp(Guayana_tsne_spearman_BAC)
PCA_ABR <- prcomp(Guayana_tsne_spearman_ABR)
PCA_REL <- prcomp(Guayana_tsne_spearman_REL)

5 PCA 0/1

as.data.frame(PCA$x[,1:2]) %>% rownames_to_column("dataset") %>% inner_join(Tabla_inicial) %>%
  select(dataset,PC1,PC2,Tipo) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = Tipo)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 66%") +ylab("PC2 9%")
## Joining with `by = join_by(dataset)`

6 PCA 0/1 ABR

as.data.frame(PCA_ABR$x[,1:2]) %>% rownames_to_column("dataset") %>% inner_join(Tabla_inicial) %>%
  select(dataset,PC1,PC2,Tipo) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = Tipo)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 22.8%") +ylab("PC2 18%")
## Joining with `by = join_by(dataset)`

7 PCA 0/1 BAC

as.data.frame(PCA_BAC$x[,1:2]) %>% rownames_to_column("dataset") %>% inner_join(Tabla_inicial) %>%
  select(dataset,PC1,PC2,Tipo) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = Tipo)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 22.8%") +ylab("PC2 18%")
## Joining with `by = join_by(dataset)`

8 PCA 0/1 REL

as.data.frame(PCA_REL$x[,1:2]) %>% rownames_to_column("dataset") %>% inner_join(Tabla_inicial) %>%
  select(dataset,PC1,PC2,Tipo) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = Tipo)) + geom_point() +
  ggtitle ("Principal Component Analysis") +xlab("PC1 22.8%") +ylab("PC2 18%")
## Joining with `by = join_by(dataset)`

9 PCA Semana

as.data.frame(PCA$x[,1:2]) %>% rownames_to_column("dataset") %>% inner_join(Tabla_inicial) %>%
  select(dataset,PC1,PC2,Semana) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = as.numeric(Semana))) + geom_point() +
  scale_color_gradient(low='yellow', high = 'black')+
  ggtitle ("Principal Component Analysis") +xlab("PC1 66%") +ylab("PC2 9%")
## Joining with `by = join_by(dataset)`

10 PCA Semana ABR

as.data.frame(PCA_ABR$x[,1:2]) %>% rownames_to_column("dataset") %>% inner_join(Tabla_inicial) %>%
  select(dataset,PC1,PC2,Semana) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = as.numeric(Semana))) + geom_point() +
  scale_color_gradient(low='yellow', high = 'black')+
  ggtitle ("Principal Component Analysis") +xlab("PC1 66%") +ylab("PC2 9%")
## Joining with `by = join_by(dataset)`

11 PCA Semana BAC

as.data.frame(PCA_BAC$x[,1:2]) %>% rownames_to_column("dataset") %>% inner_join(Tabla_inicial) %>%
  select(dataset,PC1,PC2,Semana) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = as.numeric(Semana))) + geom_point() +
  scale_color_gradient(low='yellow', high = 'black')+
  ggtitle ("Principal Component Analysis") +xlab("PC1 66%") +ylab("PC2 9%")
## Joining with `by = join_by(dataset)`

12 PCA Semana REL

as.data.frame(PCA_REL$x[,1:2]) %>% rownames_to_column("dataset") %>% inner_join(Tabla_inicial) %>%
  select(dataset,PC1,PC2,Semana) %>% distinct() %>% ggplot(aes(x = PC1, y = PC2, color = as.numeric(Semana))) + geom_point() +
  scale_color_gradient(low='yellow', high = 'black')+
  ggtitle ("Principal Component Analysis") +xlab("PC1 66%") +ylab("PC2 9%")
## Joining with `by = join_by(dataset)`

13 UMAP

Guayana_tsne_spearman <- as.matrix(Guayana_tsne_spearman)
Guayana_tsne_spearman_REL <- as.matrix(Guayana_tsne_spearman_REL)
Guayana_tsne_spearman_ABR <- as.matrix(Guayana_tsne_spearman_ABR)
Guayana_tsne_spearman_BAC <- as.matrix(Guayana_tsne_spearman_BAC)

umap_plot <- umap(Guayana_tsne_spearman, input="dist")
umap_plot_REL <- umap(Guayana_tsne_spearman_REL)
umap_plot_ABR <- umap(Guayana_tsne_spearman_ABR)
umap_plot_BAC <- umap(Guayana_tsne_spearman_BAC)

umap_plot <- as.data.frame(umap_plot$layout) %>% rownames_to_column("dataset") %>% inner_join(., Tabla_inicial)
## Joining with `by = join_by(dataset)`
umap_plot_REL <- as.data.frame(umap_plot_REL$layout) %>% rownames_to_column("dataset") %>% inner_join(., Tabla_inicial)
## Joining with `by = join_by(dataset)`
umap_plot_ABR <- as.data.frame(umap_plot_ABR$layout) %>% rownames_to_column("dataset") %>% inner_join(., Tabla_inicial)
## Joining with `by = join_by(dataset)`
umap_plot_BAC <- as.data.frame(umap_plot_BAC$layout) %>% rownames_to_column("dataset") %>% inner_join(., Tabla_inicial)
## Joining with `by = join_by(dataset)`

14 UMAP semana

umap_plot %>%
  select(dataset,V1,V2,Semana) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = as.numeric(Semana))) + geom_point()+
  scale_color_gradient(low='blue', high = 'red')+
  ggtitle ("UMAP")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1,strip.background = element_blank(), 
        axis.text.x = element_text(angle = 0,  hjust=1))

15 UMAP semana ABR

umap_plot_ABR %>%
  select(dataset,V1,V2,Semana) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = as.numeric(Semana))) + geom_point()+
  scale_color_gradient(low='yellow', high = 'black')+
  ggtitle ("UMAP")

16 UMAP semana BAC

umap_plot_BAC %>%
  select(dataset,V1,V2,Semana) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = as.numeric(Semana))) + geom_point()+
  scale_color_gradient(low='yellow', high = 'black')+
  ggtitle ("UMAP")

17 UMAP semana REL

umap_plot_REL %>%
  select(dataset,V1,V2,Semana) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = as.numeric(Semana))) + geom_point()+
  scale_color_gradient(low='yellow', high = 'black')+
  ggtitle ("UMAP")

18 UMAP 0/1

umap_plot %>%
  select(dataset,V1,V2,Tipo) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = Tipo)) + geom_point()+
  ggtitle ("UMAP")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1,strip.background = element_blank(), 
        axis.text.x = element_text(angle = 0,  hjust=1))

19 UMAP 0/1 ABR

umap_plot_ABR %>%
  select(dataset,V1,V2,Tipo) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = Tipo)) + geom_point()+
  ggtitle ("UMAP")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1,strip.background = element_blank(), 
        axis.text.x = element_text(angle = 0,  hjust=1))

20 UMAP 0/1 BAC

umap_plot_BAC %>%
  select(dataset,V1,V2,Tipo) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = Tipo)) + geom_point()+
  ggtitle ("UMAP")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1,strip.background = element_blank(), 
        axis.text.x = element_text(angle = 0,  hjust=1))

21 UMAP 0/1 REL

umap_plot_REL %>%
  select(dataset,V1,V2,Tipo) %>% distinct() %>% ggplot(aes(x = V1, y =V2, color = Tipo)) + geom_point()+
  ggtitle ("UMAP")+
  theme_classic()+
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        aspect.ratio = 1,strip.background = element_blank(), 
        axis.text.x = element_text(angle = 0,  hjust=1))

22 STATS

22.1 Quantity of distinct genes by origin

Tabla_inicial  %>%  group_by(database,Tipo, template) %>% summarise(Different_genes_all = n()) %>% ungroup() %>% 
  group_by(Tipo,database) %>% summarise(Different_genes_all_1 = n()) %>%
  ggplot(aes(x=Tipo,y=Different_genes_all_1,fill=database)) + geom_col()
## `summarise()` has grouped output by 'database', 'Tipo'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'Tipo'. You can override using the
## `.groups` argument.

22.2 Mean of distinct genes by origin

Tabla_inicial  %>%  group_by(database,Tipo, template) %>% summarise(Different_genes_all = n()) %>% ungroup() %>% 
  group_by(Tipo,database) %>% summarise(Different_genes_all_1 = mean(Different_genes_all)) %>%
  ggplot(aes(x=Tipo,y=Different_genes_all_1,fill=database)) + geom_col()
## `summarise()` has grouped output by 'database', 'Tipo'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'Tipo'. You can override using the
## `.groups` argument.

22.3 Proportion unique genes

Tabla_inicial  %>%  group_by(database,Tipo, template) %>% summarise(Different_genes_all = n()) %>% ungroup() %>% 
  group_by(Tipo,database) %>% summarise(Different_genes_all_1 = n()) %>%
  ggplot(aes(x=Tipo,y=Different_genes_all_1,fill=database)) +geom_col(position = "fill")
## `summarise()` has grouped output by 'database', 'Tipo'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'Tipo'. You can override using the
## `.groups` argument.

22.4 Proportion unique genes per person

Tabla_inicial %>%  group_by(database,Tipo, template, dataset) %>% summarise(Different_genes_all = n()) %>% ggplot(aes(x=dataset,y=Different_genes_all,fill=database)) + geom_col(position = "fill")+ 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
## `summarise()` has grouped output by 'database', 'Tipo', 'template'. You can
## override using the `.groups` argument.