Pre-Analisis

Cargamos las librerias necesarias para el análisis.

setwd("/storage/PepeMolina2/")
library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(ggsci)
library(haven)
library(ggstatsplot)

## You can cite this package as:
##      Patil, I. (2021). Visualizations with statistical details: The 'ggstatsplot' approach.
##      Journal of Open Source Software, 6(61), 3167, doi:10.21105/joss.03167

library(breakaway)
library(DivNet)

## Loading required package: phyloseq

library(uwot)

## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

library(igraph)

## 
## Attaching package: 'igraph'
## 
## The following object is masked from 'package:ggstatsplot':
## 
##     %<-%
## 
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## 
## The following objects are masked from 'package:purrr':
## 
##     compose, simplify
## 
## The following object is masked from 'package:tidyr':
## 
##     crossing
## 
## The following object is masked from 'package:tibble':
## 
##     as_data_frame
## 
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## 
## The following object is masked from 'package:base':
## 
##     union

library(DESeq2)

## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## 
## The following objects are masked from 'package:igraph':
## 
##     normalize, path, union
## 
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## 
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## 
## 
## Attaching package: 'S4Vectors'
## 
## The following objects are masked from 'package:Matrix':
## 
##     expand, unname
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## 
## The following object is masked from 'package:tidyr':
## 
##     expand
## 
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## 
## The following object is masked from 'package:phyloseq':
## 
##     distance
## 
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## 
## The following object is masked from 'package:purrr':
## 
##     reduce
## 
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## 
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## 
## Attaching package: 'MatrixGenerics'
## 
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## 
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## 
## Attaching package: 'Biobase'
## 
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## 
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
## 
## The following object is masked from 'package:phyloseq':
## 
##     sampleNames

library(ggrepel)
library(vegan)

## Loading required package: permute
## 
## Attaching package: 'permute'
## 
## The following object is masked from 'package:igraph':
## 
##     permute
## 
## Loading required package: lattice
## This is vegan 2.6-2
## 
## Attaching package: 'vegan'
## 
## The following object is masked from 'package:igraph':
## 
##     diversity

library(tidygraph)

## 
## Attaching package: 'tidygraph'
## 
## The following objects are masked from 'package:IRanges':
## 
##     active, slice
## 
## The following objects are masked from 'package:S4Vectors':
## 
##     active, rename
## 
## The following object is masked from 'package:igraph':
## 
##     groups
## 
## The following object is masked from 'package:breakaway':
## 
##     convert
## 
## The following object is masked from 'package:stats':
## 
##     filter

library(ggraph)
library(patchwork)

source("/storage/megalodon/R/import_kraken.R")
source("/storage/megalodon/R/kraken_heat_tree.R")

Carga de datos y preparación del Metadata

Los datos se procesaron usando Kraken2 frente a la base de datos Maxikraken (130Gb) que nos ha permitido reducir significativamente la cantidad de reads no identificados.

files = dir(".",pattern = "mic.kreport",full.names = T)

# metadata <- haven::read_dta("Tripobiome datos originales.dta")
# metadata <- metadata %>% dplyr::rename(Sample = ID_UCA_GT) %>% zap_labels()
# metadata <- metadata %>% mutate(Sample = paste0("S",Sample))
# metadata <- as.data.frame(metadata)
# 
# met2 <- readxl::read_xlsx("Tripobiome_Datos etiquetas_renamed.xlsx")
# 
# metadata <- inner_join(
#   metadata %>% select(Sample,id_paciente),
#   met2,
#   by = c("id_paciente" = "Codigo")
# )

metadata <- read_csv("Metadata.csv")

## Rows: 80 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (34): Sample, id_paciente, DAG, Consentimiento, 18anyos, area_endemica,...
## dttm  (3): fecha_visita, fecha_ci, fecha_nacimiento
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

metadata <- metadata %>% mutate(tto_parasiticida_correcto = ifelse(is.na(tto_parasiticida_correcto),"Control",tto_parasiticida_correcto)) %>% 
  mutate(bristol = ifelse(is.na(bristol),"NA",bristol)) %>% 
  mutate(sexo = ifelse(is.na(sexo),"NA",sexo))

Importamos los archivos kreport de Kraken2

tabla <- import_kraken(files,threads = 20)

## Loading required package: doParallel

## Loading required package: foreach

## 
## Attaching package: 'foreach'

## The following objects are masked from 'package:purrr':
## 
##     accumulate, when

## Loading required package: iterators

## Loading required package: parallel

tabla <- tabla %>% mutate(Sample = gsub(".mic.kreport","",Sample))

tabla2 <- tabla %>% inner_join(metadata)

## Joining, by = "Sample"

Estadistica descriptiva de los resultados de Kraken2

tabla2 %>% filter(TaxRank %in% c("U","D")) %>%
  ggplot(aes(x=Sample,y=Reads, fill = Name2)) + geom_col(position = "stack") + coord_flip() + scale_fill_d3() + facet_wrap(~ GRUPO,scales = "free")

Representación de las 15 familias mas abundantes

tabla2 %>%
  filter(grepl("Bacteria",Fullname)) %>%
  filter(grepl("F",TaxRank)) %>%
  group_by(NCBI) %>% 
  filter(!is.na(GRUPO)) %>% 
  summarise(Sum = sum(Prop)) %>%
  arrange(desc(Sum)) %>%
  mutate(R = row_number()) %>%
  inner_join(tabla2) %>%
  filter(R <15) %>%
  ggplot(aes(x=Sample,y=Prop, fill=Name2)) +
  geom_col(position = "fill") +
  scale_fill_d3(palette = "category20") + facet_wrap(.~ GRUPO, scales = "free")+ theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))

## Joining, by = "NCBI"

Representación de las 15 clases mas abundantes

tabla2 %>%
  filter(grepl("Bacteria",Fullname)) %>%
  filter(grepl("C",TaxRank)) %>%
  group_by(NCBI) %>% 
  filter(!is.na(GRUPO)) %>% 
  summarise(Sum = sum(Prop)) %>%
  arrange(desc(Sum)) %>%
  mutate(R = row_number()) %>%
  inner_join(tabla2) %>%
  filter(R <15) %>%
  ggplot(aes(x=Sample,y=Prop, fill=Name2)) +
  geom_col(position = "fill") +
  scale_fill_d3(palette = "category20") + facet_wrap(.~ GRUPO, scales = "free")+ theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))

## Joining, by = "NCBI"

Representación de los 15 generos mas abundantes

tabla2 %>%
  filter(grepl("Bacteria",Fullname)) %>%
  filter(grepl("G",TaxRank)) %>%
  group_by(NCBI) %>% 
  filter(!is.na(GRUPO)) %>% 
  summarise(Sum = sum(Prop)) %>%
  arrange(desc(Sum)) %>%
  mutate(R = row_number()) %>%
  inner_join(tabla2) %>%
  filter(R <15) %>%
  ggplot(aes(x=Sample,y=Prop, fill=Name2)) +
  geom_col(position = "fill") +
  scale_fill_d3(palette = "category20") + facet_wrap(.~ GRUPO, scales = "free")+ theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))

## Joining, by = "NCBI"

Alpha Diversidad

Calculo de la alpha-Diversidad (Shannon y Simpson). Para aumentar la fiabilidad del test los datos fueron rarificados para eliminar las diferencias de diversidad ocasionadas por las distintas profundidades de secuenciación.

 dn <- tabla2 %>%
   filter(grepl("Bacteria",Fullname)) %>%
   filter(grepl("S",TaxRank)) %>%
   select(NCBI,Reads,Sample) %>% 
   distinct() %>% 
   pivot_wider(names_from = Sample, values_from = Reads, values_fill = 0) %>%
   column_to_rownames("NCBI") 

dn <- rrarefy(dn, sample = min(colSums(dn)))

shannon <- diversity(t(dn)) %>% as.data.frame()
simpson <- diversity(t(dn), index = "simpson") %>% as.data.frame()

Análisis estadisticos de la Alpha Diversidad usando el test estadisto robust t-test

shannon %>%
  rename(estimate = 1) %>%
  rownames_to_column("Sample") %>% 
  inner_join(tabla2) %>% select(GRUPO, Sample, estimate) %>% distinct() %>%
  filter(!is.na(GRUPO)) %>% ggbetweenstats(x = GRUPO, y = estimate, title = "Shannon Alpha Diversity", type = "robust") + theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))

## Joining, by = "Sample"

simpson %>%
  rename(estimate = 1) %>%
  rownames_to_column("Sample") %>% 
  inner_join(tabla2) %>% select(GRUPO, Sample, estimate) %>% distinct() %>%
  filter(!is.na(GRUPO)) %>% ggbetweenstats(x = GRUPO, y = estimate, title = "Simpson Alpha Diversity", type = "robust") + theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))

## Joining, by = "Sample"

Beta diversity

Analsis de la beta diversidad. Usamos UMAP para la reducción dimensional sobre la distancia bray-curtis y el test ADONIS para el analisis estadistico de las agrupaciones por GRUPO.

dn <- tabla2 %>%
   filter(grepl("Bacteria",Fullname)) %>%
   select(NCBI,Reads,Sample) %>% 
   distinct() %>% 
   pivot_wider(names_from = Sample, values_from = Reads, values_fill = 0) %>%
   column_to_rownames("NCBI") 

dn <- t(dn)
bc <- avgdist(dn, sample = min(colSums(dn))) %>% as.matrix()

um <- umap(bc) %>% as.data.frame() %>% rename(X =1, Y =2)
rownames(um) <- colnames(bc)
um %>%
  rownames_to_column("Sample") %>%
  left_join(metadata) %>% filter(!is.na(GRUPO)) %>% 
  ggplot(aes(x=X, y=Y, fill = GRUPO)) + geom_label(aes(label = Sample)) + scale_fill_d3()

## Joining, by = "Sample"

data <- metadata %>% inner_join(tabla2 %>% filter(NCBI ==2) %>% select(Sample,Reads)) %>% column_to_rownames("Sample")

## Joining, by = "Sample"

## Insertar Edad
ado <- adonis2(dn ~ tto_parasiticida_correcto+bristol+GRUPO+sexo+Reads,data)
ado

PCoA y betadisper

beta_disper_grupo <- betadisper(bc %>% as.dist(),group = metadata$GRUPO)
beta_disper_grupo

## 
##  Homogeneity of multivariate dispersions
## 
## Call: betadisper(d = bc %>% as.dist(), group = metadata$GRUPO)
## 
## No. of Positive Eigenvalues: 79
## No. of Negative Eigenvalues: 0
## 
## Average distance to median:
##                       Control no infectado 
##                                     0.6603 
##  Infección crónica con afectación cardiaca 
##                                     0.6550 
## Infección crónica con afectación digestiva 
##                                     0.6510 
##            Infección crónica indeterminada 
##                                     0.6623 
## 
## Eigenvalues for PCoA axes:
## (Showing 8 of 79 eigenvalues)
##  PCoA1  PCoA2  PCoA3  PCoA4  PCoA5  PCoA6  PCoA7  PCoA8 
## 0.8480 0.7990 0.7657 0.7508 0.7334 0.7194 0.7083 0.6951

plot(beta_disper_grupo)

beta_disper_tto <- betadisper(bc %>% as.dist(),group = metadata$tto_parasiticida_correcto)
beta_disper_tto

## 
##  Homogeneity of multivariate dispersions
## 
## Call: betadisper(d = bc %>% as.dist(), group =
## metadata$tto_parasiticida_correcto)
## 
## No. of Positive Eigenvalues: 79
## No. of Negative Eigenvalues: 0
## 
## Average distance to median:
## Control      No      Si 
##  0.6604  0.6638  0.6635 
## 
## Eigenvalues for PCoA axes:
## (Showing 8 of 79 eigenvalues)
##  PCoA1  PCoA2  PCoA3  PCoA4  PCoA5  PCoA6  PCoA7  PCoA8 
## 0.8480 0.7990 0.7657 0.7508 0.7334 0.7194 0.7083 0.6951

plot(beta_disper_tto)

### heat-map trees

Análisis diferencial de abundancia en la variable GRUPO

Para el análisis diferencial hemos usado el test DESeq2 sobre los datos normalizados por rarefaction. No siempre es necesario estre proceso pero debido a la diferencia de profundidad de algunas muestras hemos preferido un enfoque mas conservador.

El test DESeq2 usa: “Differential abundance analysis based on the Negative Binomial (a.k.a. Gamma-Poisson) distribution”

preparación de los datos en los formatos necesarios.

table_otu <- tabla2 %>%
  dplyr::filter(grepl("Bacteria",Fullname)) %>%
  dplyr::select(NCBI,Reads,Sample) %>%
  distinct() %>% 
  pivot_wider(names_from = Sample, values_from = Reads, values_fill = 0) %>%
  column_to_rownames("NCBI") %>% as.matrix()

table_otu <- rrarefy(table_otu, sample = min(colSums(table_otu)))

cdata <- metadata %>% as.data.frame() %>% 
  column_to_rownames("Sample") %>%
  filter(!is.na(GRUPO)) %>%
  mutate(GRUPO = gsub(" ","_",GRUPO))


countd <- table_otu[,rownames(cdata)]
dds <- DESeqDataSetFromMatrix(countData = countd,
                              colData = cdata,
                              design = ~ GRUPO)

##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]

Eliminamos aquellos taxones con una abundancia total menor de 10.

keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]

Realizamos el test estadistico propiamente.

dds <- DESeq(dds, fitType = "local")

## estimating size factors

##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]

## final dispersion estimates

##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]

## fitting model and testing

##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]

## -- replacing outliers and refitting for 1950 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]

## fitting model and testing

##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]

normalized_counts <-counts(dds, normalized=TRUE) %>% as.data.frame() %>% rownames_to_column("NCBI") %>% pivot_longer(names_to = "Sample", values_to = "NormAbundance",-NCBI)

Visualization de los resultados del analisis diferencial de abundancias.

taxtable <- tabla2 %>% select(NCBI, TaxRank,Name2) %>% distinct() %>% mutate(NCBI = as.character(NCBI))
tax_network <- tabla2 %>% select(FullNCBI) %>% distinct()

Infección crónica indeterminada vs Control no infectado

Volcano Plot con las taxones con p-value ajustado menor a 0.001

res_c_ici <- results(dds) %>%
  as.data.frame() %>%
  mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
  rownames_to_column("NCBI") %>%
  inner_join(taxtable) %>% mutate(label = ifelse(sig == "Significative",Name2,NA))

## Joining, by = "NCBI"

res_c_ici %>% ggplot(aes(x= log2FoldChange, y = -log10(padj), color = sig, size = sqrt(baseMean), label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 4)+
  scale_color_d3() +
  theme_light() + labs(title = "Infección crónica indeterminada vs Control no infectado")

Visualización individual de los taxones significativos.

results(dds) %>%
  as.data.frame() %>%
  mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
  rownames_to_column("NCBI") %>%
  inner_join(tabla2 %>% mutate(NCBI = as.character(NCBI))) %>%
  filter(sig =="Significative") %>%
  filter(!is.na(GRUPO)) %>%
  inner_join(normalized_counts) %>%
  filter(GRUPO %in% c("Infección crónica con afectación cardiaca","Control no infectado")) %>%
  ggplot(aes(x = GRUPO, y = NormAbundance, fill = GRUPO)) +
  geom_violin() +
  facet_grid(~Name2, scales = "free") +
  scale_fill_d3() +
  scale_y_log10() +
  theme_light() +
  labs(title = "Infección crónica indeterminada vs Control no infectado")+ theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))

## Joining, by = "NCBI"
## Joining, by = c("NCBI", "Sample")

Heat-Tree con los taxones significativos (p-values ajustado < 0.05) y su taxonomia asociada.

kraken_heat_tree(results(dds) %>% as.data.frame(),tabla2,0.05)

## Joining, by = "NCBI"
## Joining, by = "from"
## Joining, by = "id"
## Joining, by = "id"
## Using `tree` as default layout

Infección_crónica_con_afectación_cardiaca vs Control_no_infectado

Volcano Plot con las taxones con p-value ajustado menor a 0.001

res_c_icac <- results(dds, contrast = c("GRUPO","Infección_crónica_con_afectación_cardiaca","Control_no_infectado")) %>%
  as.data.frame() %>%
  mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
  rownames_to_column("NCBI") %>%
  inner_join(taxtable) %>% mutate(label = ifelse(sig == "Significative",Name2,NA))

## Joining, by = "NCBI"

res_c_icac %>% ggplot(aes(x= log2FoldChange, y = -log10(padj), color = sig, size = sqrt(baseMean), label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 4)+
  scale_color_d3() +
  theme_light()+
  labs(title = "Infección crónica con afectación cardiaca vs Control no infectado")

Visualización individual de los taxones significativos.

results(dds, contrast = c("GRUPO","Infección_crónica_con_afectación_cardiaca","Control_no_infectado")) %>%
  as.data.frame() %>%
  mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
  rownames_to_column("NCBI") %>%
  inner_join(tabla2 %>% mutate(NCBI = as.character(NCBI))) %>%
  filter(sig =="Significative") %>%
  filter(!is.na(GRUPO)) %>%
  inner_join(normalized_counts) %>%
  filter(GRUPO %in% c("Infección crónica con afectación cardiaca","Control no infectado")) %>%
  ggplot(aes(x = GRUPO, y = NormAbundance, fill = GRUPO)) +
  geom_violin() +
  facet_grid(~Name2, scales = "free") + scale_fill_d3() + scale_y_log10() + theme_light() +
  labs(title = "Infección crónica con afectación cardiaca vs Control no infectado")+ 
  theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))

## Joining, by = "NCBI"
## Joining, by = c("NCBI", "Sample")

Heat-Tree con los taxones significativos (p-values ajustado < 0.05) y su taxonomia asociada.

kraken_heat_tree(results(dds, contrast = c("GRUPO","Infección_crónica_con_afectación_cardiaca","Control_no_infectado")) %>% as.data.frame(),tabla2,0.05)

## Joining, by = "NCBI"
## Joining, by = "from"
## Joining, by = "id"
## Joining, by = "id"
## Using `tree` as default layout

Infección_crónica_con_afectación_digestiva vs Control_no_infectado

Volcano Plot con las taxones con p-value ajustado menor a 0.001

res_c_icad <- results(dds, contrast = c("GRUPO","Infección_crónica_con_afectación_digestiva","Control_no_infectado")) %>%
  as.data.frame() %>%
  mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
  rownames_to_column("NCBI") %>%
  inner_join(taxtable) %>% mutate(label = ifelse(sig == "Significative",Name2,NA))

## Joining, by = "NCBI"

res_c_icad %>%
  filter(!is.na(padj)) %>%
  ggplot(aes(x= log2FoldChange, y = -log10(padj), color = sig, size = sqrt(baseMean), label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 4)+
  scale_color_d3() +
  theme_light() + labs(title = "Infección crónica con afectación digestiva vs Control no infectado" )

Visualización individual de los taxones significativos.

results(dds, contrast = c("GRUPO","Infección_crónica_con_afectación_digestiva","Control_no_infectado")) %>%
  as.data.frame() %>%
  mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
  rownames_to_column("NCBI") %>%
  inner_join(tabla2 %>% mutate(NCBI = as.character(NCBI))) %>%
  filter(sig =="Significative") %>%
  filter(!is.na(GRUPO)) %>%
  inner_join(normalized_counts) %>%
  filter(GRUPO %in% c("Infección crónica con afectación digestiva","Control no infectado")) %>%
  ggplot(aes(x = GRUPO, y = NormAbundance, fill = GRUPO)) +
  geom_violin() +
  facet_grid(~Name2, scales = "free")  + 
  scale_fill_d3() + 
  scale_y_log10()+ 
  theme_light() +
  labs(title = "Infección crónica con afectación digestiva vs Control no infectado" ) +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

## Joining, by = "NCBI"
## Joining, by = c("NCBI", "Sample")

Heat-Tree con los taxones significativos (p-values ajustado < 0.05) y su taxonomia asociada.

kraken_heat_tree(results(dds, contrast = c("GRUPO","Infección_crónica_con_afectación_digestiva","Control_no_infectado")) %>% as.data.frame(),tabla2,0.05)

## Joining, by = "NCBI"
## Joining, by = "from"
## Joining, by = "id"
## Joining, by = "id"
## Using `tree` as default layout

Heatmap

Heat map con todos los genes significativos de todos los contrastes anteriores.

signi <- bind_rows(res_c_icac,res_c_icad) %>% bind_rows(res_c_ici) %>% filter(sig =="Significative")

annot <- tabla2 %>% select(Sample,GRUPO,sexo) %>% distinct() %>% column_to_rownames("Sample")

tabla2 %>% as_tibble() %>% 
  mutate(NCBI = as.character(NCBI)) %>% 
  semi_join(signi, by ="NCBI") %>% 
  inner_join(normalized_counts %>% mutate(NCBI = NCBI)) %>% 
  group_by(Sample,Name2) %>% 
  summarise(NormAbundance = log10(max(NormAbundance))) %>% 
  ungroup() %>% 
  pivot_wider(names_from = Name2, values_from = NormAbundance,values_fill = 0) %>% 
  column_to_rownames("Sample") %>% 
  as.matrix() %>% 
  pheatmap::pheatmap(annotation_row = annot)

## Joining, by = c("NCBI", "Sample")
## `summarise()` has grouped output by 'Sample'. You can override using the
## `.groups` argument.

Análisis diferencial de abundancia en la variable Tratamiento

cdata <- metadata %>% 
  as.data.frame() %>% 
  column_to_rownames("Sample")


countd <- table_otu[,rownames(cdata)]
dds <- DESeqDataSetFromMatrix(countData = countd,
                              colData = cdata,
                              design = ~ tto_parasiticida_correcto)

Eliminamos aquellos taxones con una abundancia total menor de 10.

keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]

Realizamos el test estadistico propiamente.

dds <- DESeq(dds, fitType = "local")

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 2103 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

normalized_counts <-counts(dds, normalized=TRUE) %>% as.data.frame() %>% rownames_to_column("NCBI") %>% pivot_longer(names_to = "Sample", values_to = "NormAbundance",-NCBI)

tto parasiticida correcto Si vs Control

Volcano Plot con las taxones con p-value ajustado menor a 0.001

tto_si_vs_control <- results(dds) %>%
  as.data.frame() %>%
  mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
  rownames_to_column("NCBI") %>%
  inner_join(taxtable) %>% mutate(label = ifelse(sig == "Significative",Name2,NA))

## Joining, by = "NCBI"

tto_si_vs_control %>% ggplot(aes(x= log2FoldChange, y = -log10(padj), color = sig, size = sqrt(baseMean), label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 4)+
  scale_color_d3() +
  theme_light() + labs(title = "tto parasiticida correcto Si vs Control")

Visualización individual de los taxones significativos.

results(dds) %>%
  as.data.frame() %>%
  mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
  rownames_to_column("NCBI") %>%
  inner_join(tabla2 %>% mutate(NCBI = as.character(NCBI))) %>%
  filter(sig =="Significative") %>%
  inner_join(normalized_counts) %>%
  filter(tto_parasiticida_correcto %in% c("Si","Control")) %>%
  ggplot(aes(x = tto_parasiticida_correcto, y = NormAbundance, fill = tto_parasiticida_correcto)) +
  geom_violin() +
  facet_grid(~Name2, scales = "free") +
  scale_fill_d3() +
  scale_y_log10() +
  theme_light() +
  labs(title = "tto parasiticida correcto Si vs Control")+ 
  theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))

## Joining, by = "NCBI"
## Joining, by = c("NCBI", "Sample")

Heat-Tree con los taxones significativos (p-values ajustado < 0.05) y su taxonomia asociada.

kraken_heat_tree(results(dds) %>% as.data.frame(),tabla2,0.05)

## Joining, by = "NCBI"
## Joining, by = "from"
## Joining, by = "id"
## Joining, by = "id"
## Using `tree` as default layout

tto parasiticida correcto No vs Control

Volcano Plot con las taxones con p-value ajustado menor a 0.001

tto_no_vs_control <- results(dds, contrast = c("tto_parasiticida_correcto","No","Control")) %>%
  as.data.frame() %>%
  mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
  rownames_to_column("NCBI") %>%
  inner_join(taxtable) %>% mutate(label = ifelse(sig == "Significative",Name2,NA))

## Joining, by = "NCBI"

tto_no_vs_control %>% ggplot(aes(x= log2FoldChange, y = -log10(padj), color = sig, size = sqrt(baseMean), label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 4)+
  scale_color_d3() +
  theme_light() + labs(title = "tto parasiticida correcto No vs Control")

Visualización individual de los taxones significativos.

results(dds,contrast = c("tto_parasiticida_correcto","No","Control")) %>%
  as.data.frame() %>%
  mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
  rownames_to_column("NCBI") %>%
  inner_join(tabla2 %>% mutate(NCBI = as.character(NCBI))) %>%
  filter(sig =="Significative") %>%
  inner_join(normalized_counts) %>%
  filter(tto_parasiticida_correcto %in% c("No","Control")) %>%
  ggplot(aes(x = tto_parasiticida_correcto, y = NormAbundance, fill = tto_parasiticida_correcto)) +
  geom_violin() +
  facet_grid(~Name2, scales = "free") +
  scale_fill_d3() +
  scale_y_log10() +
  theme_light() +
  labs(title = "tto parasiticida correcto No vs Control")+ 
  theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))

## Joining, by = "NCBI"
## Joining, by = c("NCBI", "Sample")

Heat-Tree con los taxones significativos (p-values ajustado < 0.05) y su taxonomia asociada.

kraken_heat_tree(results(dds,contrast = c("tto_parasiticida_correcto","No","Control")) %>% as.data.frame(),tabla2,0.05)

## Joining, by = "NCBI"
## Joining, by = "from"
## Joining, by = "id"
## Joining, by = "id"
## Using `tree` as default layout

tto parasiticida correcto No vs Si

Volcano Plot con las taxones con p-value ajustado menor a 0.001

tto_no_vs_si <- results(dds, contrast = c("tto_parasiticida_correcto","No","Si")) %>%
  as.data.frame() %>%
  mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
  rownames_to_column("NCBI") %>%
  inner_join(taxtable) %>% mutate(label = ifelse(sig == "Significative",Name2,NA))

## Joining, by = "NCBI"

tto_no_vs_si %>% ggplot(aes(x= log2FoldChange, y = -log10(padj), color = sig, size = sqrt(baseMean), label = label)) +
  geom_point(alpha = 0.5) +
  geom_text_repel(size = 4)+
  scale_color_d3() +
  theme_light() + labs(title = "tto parasiticida correcto No vs Si")

Visualización individual de los taxones significativos.

results(dds,contrast = c("tto_parasiticida_correcto","No","Si")) %>%
  as.data.frame() %>%
  mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
  rownames_to_column("NCBI") %>%
  inner_join(tabla2 %>% mutate(NCBI = as.character(NCBI))) %>%
  filter(sig =="Significative") %>%
  inner_join(normalized_counts) %>%
  filter(tto_parasiticida_correcto %in% c("No","Si")) %>%
  ggplot(aes(x = tto_parasiticida_correcto, y = NormAbundance, fill = tto_parasiticida_correcto)) +
  geom_violin() +
  facet_grid(~Name2, scales = "free") +
  scale_fill_d3() +
  scale_y_log10() +
  theme_light() +
  labs(title = "tto parasiticida correcto No vs Si")+ 
  theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))

## Joining, by = "NCBI"
## Joining, by = c("NCBI", "Sample")

Heat-Tree con los taxones significativos (p-values ajustado < 0.05) y su taxonomia asociada.

kraken_heat_tree(results(dds,contrast = c("tto_parasiticida_correcto","No","Si")) %>% as.data.frame(),tabla2,0.05)

## Joining, by = "NCBI"
## Joining, by = "from"
## Joining, by = "id"
## Joining, by = "id"
## Using `tree` as default layout

Analisis Metagenómico del proyecto TRIPOBIOME

Pre-Analisis

Carga de datos y preparación del Metadata

Alpha Diversidad

Beta diversity

PCoA y betadisper

Análisis diferencial de abundancia en la variable GRUPO

Visualization de los resultados del analisis diferencial de abundancias.

Infección crónica indeterminada vs Control no infectado

Infección_crónica_con_afectación_cardiaca vs Control_no_infectado

Infección_crónica_con_afectación_digestiva vs Control_no_infectado

Heatmap

Análisis diferencial de abundancia en la variable Tratamiento

tto parasiticida correcto Si vs Control

tto parasiticida correcto No vs Control

tto parasiticida correcto No vs Si