Cargamos las librerias necesarias para el análisis.
setwd("/storage/PepeMolina2/")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggsci)
library(haven)
library(ggstatsplot)
## You can cite this package as:
## Patil, I. (2021). Visualizations with statistical details: The 'ggstatsplot' approach.
## Journal of Open Source Software, 6(61), 3167, doi:10.21105/joss.03167
library(breakaway)
library(DivNet)
## Loading required package: phyloseq
library(uwot)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(igraph)
##
## Attaching package: 'igraph'
##
## The following object is masked from 'package:ggstatsplot':
##
## %<-%
##
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
##
## The following objects are masked from 'package:purrr':
##
## compose, simplify
##
## The following object is masked from 'package:tidyr':
##
## crossing
##
## The following object is masked from 'package:tibble':
##
## as_data_frame
##
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
##
## The following object is masked from 'package:base':
##
## union
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
##
## The following objects are masked from 'package:igraph':
##
## normalize, path, union
##
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
##
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
##
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
##
##
## Attaching package: 'S4Vectors'
##
## The following objects are masked from 'package:Matrix':
##
## expand, unname
##
## The following objects are masked from 'package:dplyr':
##
## first, rename
##
## The following object is masked from 'package:tidyr':
##
## expand
##
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
##
## The following object is masked from 'package:phyloseq':
##
## distance
##
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## The following object is masked from 'package:purrr':
##
## reduce
##
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
##
## The following object is masked from 'package:dplyr':
##
## count
##
##
## Attaching package: 'MatrixGenerics'
##
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
##
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
##
## Attaching package: 'Biobase'
##
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
##
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
##
## The following object is masked from 'package:phyloseq':
##
## sampleNames
library(ggrepel)
library(vegan)
## Loading required package: permute
##
## Attaching package: 'permute'
##
## The following object is masked from 'package:igraph':
##
## permute
##
## Loading required package: lattice
## This is vegan 2.6-2
##
## Attaching package: 'vegan'
##
## The following object is masked from 'package:igraph':
##
## diversity
library(tidygraph)
##
## Attaching package: 'tidygraph'
##
## The following objects are masked from 'package:IRanges':
##
## active, slice
##
## The following objects are masked from 'package:S4Vectors':
##
## active, rename
##
## The following object is masked from 'package:igraph':
##
## groups
##
## The following object is masked from 'package:breakaway':
##
## convert
##
## The following object is masked from 'package:stats':
##
## filter
library(ggraph)
library(patchwork)
source("/storage/megalodon/R/import_kraken.R")
source("/storage/megalodon/R/kraken_heat_tree.R")
Los datos se procesaron usando Kraken2 frente a la base de datos Maxikraken (130Gb) que nos ha permitido reducir significativamente la cantidad de reads no identificados.
files = dir(".",pattern = "mic.kreport",full.names = T)
# metadata <- haven::read_dta("Tripobiome datos originales.dta")
# metadata <- metadata %>% dplyr::rename(Sample = ID_UCA_GT) %>% zap_labels()
# metadata <- metadata %>% mutate(Sample = paste0("S",Sample))
# metadata <- as.data.frame(metadata)
#
# met2 <- readxl::read_xlsx("Tripobiome_Datos etiquetas_renamed.xlsx")
#
# metadata <- inner_join(
# metadata %>% select(Sample,id_paciente),
# met2,
# by = c("id_paciente" = "Codigo")
# )
metadata <- read_csv("Metadata.csv")
## Rows: 80 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (34): Sample, id_paciente, DAG, Consentimiento, 18anyos, area_endemica,...
## dttm (3): fecha_visita, fecha_ci, fecha_nacimiento
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
metadata <- metadata %>% mutate(tto_parasiticida_correcto = ifelse(is.na(tto_parasiticida_correcto),"Control",tto_parasiticida_correcto)) %>%
mutate(bristol = ifelse(is.na(bristol),"NA",bristol)) %>%
mutate(sexo = ifelse(is.na(sexo),"NA",sexo))
Importamos los archivos kreport de Kraken2
tabla <- import_kraken(files,threads = 20)
## Loading required package: doParallel
## Loading required package: foreach
##
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
##
## accumulate, when
## Loading required package: iterators
## Loading required package: parallel
tabla <- tabla %>% mutate(Sample = gsub(".mic.kreport","",Sample))
tabla2 <- tabla %>% inner_join(metadata)
## Joining, by = "Sample"
Estadistica descriptiva de los resultados de Kraken2
tabla2 %>% filter(TaxRank %in% c("U","D")) %>%
ggplot(aes(x=Sample,y=Reads, fill = Name2)) + geom_col(position = "stack") + coord_flip() + scale_fill_d3() + facet_wrap(~ GRUPO,scales = "free")
Representación de las 15 familias mas abundantes
tabla2 %>%
filter(grepl("Bacteria",Fullname)) %>%
filter(grepl("F",TaxRank)) %>%
group_by(NCBI) %>%
filter(!is.na(GRUPO)) %>%
summarise(Sum = sum(Prop)) %>%
arrange(desc(Sum)) %>%
mutate(R = row_number()) %>%
inner_join(tabla2) %>%
filter(R <15) %>%
ggplot(aes(x=Sample,y=Prop, fill=Name2)) +
geom_col(position = "fill") +
scale_fill_d3(palette = "category20") + facet_wrap(.~ GRUPO, scales = "free")+ theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))
## Joining, by = "NCBI"
Representación de las 15 clases mas abundantes
tabla2 %>%
filter(grepl("Bacteria",Fullname)) %>%
filter(grepl("C",TaxRank)) %>%
group_by(NCBI) %>%
filter(!is.na(GRUPO)) %>%
summarise(Sum = sum(Prop)) %>%
arrange(desc(Sum)) %>%
mutate(R = row_number()) %>%
inner_join(tabla2) %>%
filter(R <15) %>%
ggplot(aes(x=Sample,y=Prop, fill=Name2)) +
geom_col(position = "fill") +
scale_fill_d3(palette = "category20") + facet_wrap(.~ GRUPO, scales = "free")+ theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))
## Joining, by = "NCBI"
Representación de los 15 generos mas abundantes
tabla2 %>%
filter(grepl("Bacteria",Fullname)) %>%
filter(grepl("G",TaxRank)) %>%
group_by(NCBI) %>%
filter(!is.na(GRUPO)) %>%
summarise(Sum = sum(Prop)) %>%
arrange(desc(Sum)) %>%
mutate(R = row_number()) %>%
inner_join(tabla2) %>%
filter(R <15) %>%
ggplot(aes(x=Sample,y=Prop, fill=Name2)) +
geom_col(position = "fill") +
scale_fill_d3(palette = "category20") + facet_wrap(.~ GRUPO, scales = "free")+ theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))
## Joining, by = "NCBI"
Calculo de la alpha-Diversidad (Shannon y Simpson). Para aumentar la fiabilidad del test los datos fueron rarificados para eliminar las diferencias de diversidad ocasionadas por las distintas profundidades de secuenciación.
dn <- tabla2 %>%
filter(grepl("Bacteria",Fullname)) %>%
filter(grepl("S",TaxRank)) %>%
select(NCBI,Reads,Sample) %>%
distinct() %>%
pivot_wider(names_from = Sample, values_from = Reads, values_fill = 0) %>%
column_to_rownames("NCBI")
dn <- rrarefy(dn, sample = min(colSums(dn)))
shannon <- diversity(t(dn)) %>% as.data.frame()
simpson <- diversity(t(dn), index = "simpson") %>% as.data.frame()
Análisis estadisticos de la Alpha Diversidad usando el test estadisto robust t-test
shannon %>%
rename(estimate = 1) %>%
rownames_to_column("Sample") %>%
inner_join(tabla2) %>% select(GRUPO, Sample, estimate) %>% distinct() %>%
filter(!is.na(GRUPO)) %>% ggbetweenstats(x = GRUPO, y = estimate, title = "Shannon Alpha Diversity", type = "robust") + theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))
## Joining, by = "Sample"
simpson %>%
rename(estimate = 1) %>%
rownames_to_column("Sample") %>%
inner_join(tabla2) %>% select(GRUPO, Sample, estimate) %>% distinct() %>%
filter(!is.na(GRUPO)) %>% ggbetweenstats(x = GRUPO, y = estimate, title = "Simpson Alpha Diversity", type = "robust") + theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))
## Joining, by = "Sample"
Analsis de la beta diversidad. Usamos UMAP para la reducción dimensional sobre la distancia bray-curtis y el test ADONIS para el analisis estadistico de las agrupaciones por GRUPO.
dn <- tabla2 %>%
filter(grepl("Bacteria",Fullname)) %>%
select(NCBI,Reads,Sample) %>%
distinct() %>%
pivot_wider(names_from = Sample, values_from = Reads, values_fill = 0) %>%
column_to_rownames("NCBI")
dn <- t(dn)
bc <- avgdist(dn, sample = min(colSums(dn))) %>% as.matrix()
um <- umap(bc) %>% as.data.frame() %>% rename(X =1, Y =2)
rownames(um) <- colnames(bc)
um %>%
rownames_to_column("Sample") %>%
left_join(metadata) %>% filter(!is.na(GRUPO)) %>%
ggplot(aes(x=X, y=Y, fill = GRUPO)) + geom_label(aes(label = Sample)) + scale_fill_d3()
## Joining, by = "Sample"
data <- metadata %>% inner_join(tabla2 %>% filter(NCBI ==2) %>% select(Sample,Reads)) %>% column_to_rownames("Sample")
## Joining, by = "Sample"
## Insertar Edad
ado <- adonis2(dn ~ tto_parasiticida_correcto+bristol+GRUPO+sexo+Reads,data)
ado
beta_disper_grupo <- betadisper(bc %>% as.dist(),group = metadata$GRUPO)
beta_disper_grupo
##
## Homogeneity of multivariate dispersions
##
## Call: betadisper(d = bc %>% as.dist(), group = metadata$GRUPO)
##
## No. of Positive Eigenvalues: 79
## No. of Negative Eigenvalues: 0
##
## Average distance to median:
## Control no infectado
## 0.6603
## Infección crónica con afectación cardiaca
## 0.6550
## Infección crónica con afectación digestiva
## 0.6510
## Infección crónica indeterminada
## 0.6623
##
## Eigenvalues for PCoA axes:
## (Showing 8 of 79 eigenvalues)
## PCoA1 PCoA2 PCoA3 PCoA4 PCoA5 PCoA6 PCoA7 PCoA8
## 0.8480 0.7990 0.7657 0.7508 0.7334 0.7194 0.7083 0.6951
plot(beta_disper_grupo)
beta_disper_tto <- betadisper(bc %>% as.dist(),group = metadata$tto_parasiticida_correcto)
beta_disper_tto
##
## Homogeneity of multivariate dispersions
##
## Call: betadisper(d = bc %>% as.dist(), group =
## metadata$tto_parasiticida_correcto)
##
## No. of Positive Eigenvalues: 79
## No. of Negative Eigenvalues: 0
##
## Average distance to median:
## Control No Si
## 0.6604 0.6638 0.6635
##
## Eigenvalues for PCoA axes:
## (Showing 8 of 79 eigenvalues)
## PCoA1 PCoA2 PCoA3 PCoA4 PCoA5 PCoA6 PCoA7 PCoA8
## 0.8480 0.7990 0.7657 0.7508 0.7334 0.7194 0.7083 0.6951
plot(beta_disper_tto)
### heat-map trees
Para el análisis diferencial hemos usado el test DESeq2 sobre los datos normalizados por rarefaction. No siempre es necesario estre proceso pero debido a la diferencia de profundidad de algunas muestras hemos preferido un enfoque mas conservador.
El test DESeq2 usa: “Differential abundance analysis based on the Negative Binomial (a.k.a. Gamma-Poisson) distribution”
preparación de los datos en los formatos necesarios.
table_otu <- tabla2 %>%
dplyr::filter(grepl("Bacteria",Fullname)) %>%
dplyr::select(NCBI,Reads,Sample) %>%
distinct() %>%
pivot_wider(names_from = Sample, values_from = Reads, values_fill = 0) %>%
column_to_rownames("NCBI") %>% as.matrix()
table_otu <- rrarefy(table_otu, sample = min(colSums(table_otu)))
cdata <- metadata %>% as.data.frame() %>%
column_to_rownames("Sample") %>%
filter(!is.na(GRUPO)) %>%
mutate(GRUPO = gsub(" ","_",GRUPO))
countd <- table_otu[,rownames(cdata)]
dds <- DESeqDataSetFromMatrix(countData = countd,
colData = cdata,
design = ~ GRUPO)
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
Eliminamos aquellos taxones con una abundancia total menor de 10.
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
Realizamos el test estadistico propiamente.
dds <- DESeq(dds, fitType = "local")
## estimating size factors
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## final dispersion estimates
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## fitting model and testing
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## -- replacing outliers and refitting for 1950 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## fitting model and testing
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
normalized_counts <-counts(dds, normalized=TRUE) %>% as.data.frame() %>% rownames_to_column("NCBI") %>% pivot_longer(names_to = "Sample", values_to = "NormAbundance",-NCBI)
taxtable <- tabla2 %>% select(NCBI, TaxRank,Name2) %>% distinct() %>% mutate(NCBI = as.character(NCBI))
tax_network <- tabla2 %>% select(FullNCBI) %>% distinct()
Volcano Plot con las taxones con p-value ajustado menor a 0.001
res_c_ici <- results(dds) %>%
as.data.frame() %>%
mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
rownames_to_column("NCBI") %>%
inner_join(taxtable) %>% mutate(label = ifelse(sig == "Significative",Name2,NA))
## Joining, by = "NCBI"
res_c_ici %>% ggplot(aes(x= log2FoldChange, y = -log10(padj), color = sig, size = sqrt(baseMean), label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 4)+
scale_color_d3() +
theme_light() + labs(title = "Infección crónica indeterminada vs Control no infectado")
Visualización individual de los taxones significativos.
results(dds) %>%
as.data.frame() %>%
mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
rownames_to_column("NCBI") %>%
inner_join(tabla2 %>% mutate(NCBI = as.character(NCBI))) %>%
filter(sig =="Significative") %>%
filter(!is.na(GRUPO)) %>%
inner_join(normalized_counts) %>%
filter(GRUPO %in% c("Infección crónica con afectación cardiaca","Control no infectado")) %>%
ggplot(aes(x = GRUPO, y = NormAbundance, fill = GRUPO)) +
geom_violin() +
facet_grid(~Name2, scales = "free") +
scale_fill_d3() +
scale_y_log10() +
theme_light() +
labs(title = "Infección crónica indeterminada vs Control no infectado")+ theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))
## Joining, by = "NCBI"
## Joining, by = c("NCBI", "Sample")
Heat-Tree con los taxones significativos (p-values ajustado < 0.05) y su taxonomia asociada.
kraken_heat_tree(results(dds) %>% as.data.frame(),tabla2,0.05)
## Joining, by = "NCBI"
## Joining, by = "from"
## Joining, by = "id"
## Joining, by = "id"
## Using `tree` as default layout
Volcano Plot con las taxones con p-value ajustado menor a 0.001
res_c_icac <- results(dds, contrast = c("GRUPO","Infección_crónica_con_afectación_cardiaca","Control_no_infectado")) %>%
as.data.frame() %>%
mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
rownames_to_column("NCBI") %>%
inner_join(taxtable) %>% mutate(label = ifelse(sig == "Significative",Name2,NA))
## Joining, by = "NCBI"
res_c_icac %>% ggplot(aes(x= log2FoldChange, y = -log10(padj), color = sig, size = sqrt(baseMean), label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 4)+
scale_color_d3() +
theme_light()+
labs(title = "Infección crónica con afectación cardiaca vs Control no infectado")
Visualización individual de los taxones significativos.
results(dds, contrast = c("GRUPO","Infección_crónica_con_afectación_cardiaca","Control_no_infectado")) %>%
as.data.frame() %>%
mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
rownames_to_column("NCBI") %>%
inner_join(tabla2 %>% mutate(NCBI = as.character(NCBI))) %>%
filter(sig =="Significative") %>%
filter(!is.na(GRUPO)) %>%
inner_join(normalized_counts) %>%
filter(GRUPO %in% c("Infección crónica con afectación cardiaca","Control no infectado")) %>%
ggplot(aes(x = GRUPO, y = NormAbundance, fill = GRUPO)) +
geom_violin() +
facet_grid(~Name2, scales = "free") + scale_fill_d3() + scale_y_log10() + theme_light() +
labs(title = "Infección crónica con afectación cardiaca vs Control no infectado")+
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))
## Joining, by = "NCBI"
## Joining, by = c("NCBI", "Sample")
Heat-Tree con los taxones significativos (p-values ajustado < 0.05) y su taxonomia asociada.
kraken_heat_tree(results(dds, contrast = c("GRUPO","Infección_crónica_con_afectación_cardiaca","Control_no_infectado")) %>% as.data.frame(),tabla2,0.05)
## Joining, by = "NCBI"
## Joining, by = "from"
## Joining, by = "id"
## Joining, by = "id"
## Using `tree` as default layout
Volcano Plot con las taxones con p-value ajustado menor a 0.001
res_c_icad <- results(dds, contrast = c("GRUPO","Infección_crónica_con_afectación_digestiva","Control_no_infectado")) %>%
as.data.frame() %>%
mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
rownames_to_column("NCBI") %>%
inner_join(taxtable) %>% mutate(label = ifelse(sig == "Significative",Name2,NA))
## Joining, by = "NCBI"
res_c_icad %>%
filter(!is.na(padj)) %>%
ggplot(aes(x= log2FoldChange, y = -log10(padj), color = sig, size = sqrt(baseMean), label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 4)+
scale_color_d3() +
theme_light() + labs(title = "Infección crónica con afectación digestiva vs Control no infectado" )
Visualización individual de los taxones significativos.
results(dds, contrast = c("GRUPO","Infección_crónica_con_afectación_digestiva","Control_no_infectado")) %>%
as.data.frame() %>%
mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
rownames_to_column("NCBI") %>%
inner_join(tabla2 %>% mutate(NCBI = as.character(NCBI))) %>%
filter(sig =="Significative") %>%
filter(!is.na(GRUPO)) %>%
inner_join(normalized_counts) %>%
filter(GRUPO %in% c("Infección crónica con afectación digestiva","Control no infectado")) %>%
ggplot(aes(x = GRUPO, y = NormAbundance, fill = GRUPO)) +
geom_violin() +
facet_grid(~Name2, scales = "free") +
scale_fill_d3() +
scale_y_log10()+
theme_light() +
labs(title = "Infección crónica con afectación digestiva vs Control no infectado" ) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
## Joining, by = "NCBI"
## Joining, by = c("NCBI", "Sample")
Heat-Tree con los taxones significativos (p-values ajustado < 0.05) y
su taxonomia asociada.
kraken_heat_tree(results(dds, contrast = c("GRUPO","Infección_crónica_con_afectación_digestiva","Control_no_infectado")) %>% as.data.frame(),tabla2,0.05)
## Joining, by = "NCBI"
## Joining, by = "from"
## Joining, by = "id"
## Joining, by = "id"
## Using `tree` as default layout
Heat map con todos los genes significativos de todos los contrastes anteriores.
signi <- bind_rows(res_c_icac,res_c_icad) %>% bind_rows(res_c_ici) %>% filter(sig =="Significative")
annot <- tabla2 %>% select(Sample,GRUPO,sexo) %>% distinct() %>% column_to_rownames("Sample")
tabla2 %>% as_tibble() %>%
mutate(NCBI = as.character(NCBI)) %>%
semi_join(signi, by ="NCBI") %>%
inner_join(normalized_counts %>% mutate(NCBI = NCBI)) %>%
group_by(Sample,Name2) %>%
summarise(NormAbundance = log10(max(NormAbundance))) %>%
ungroup() %>%
pivot_wider(names_from = Name2, values_from = NormAbundance,values_fill = 0) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
pheatmap::pheatmap(annotation_row = annot)
## Joining, by = c("NCBI", "Sample")
## `summarise()` has grouped output by 'Sample'. You can override using the
## `.groups` argument.
cdata <- metadata %>%
as.data.frame() %>%
column_to_rownames("Sample")
countd <- table_otu[,rownames(cdata)]
dds <- DESeqDataSetFromMatrix(countData = countd,
colData = cdata,
design = ~ tto_parasiticida_correcto)
Eliminamos aquellos taxones con una abundancia total menor de 10.
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
Realizamos el test estadistico propiamente.
dds <- DESeq(dds, fitType = "local")
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 2103 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
normalized_counts <-counts(dds, normalized=TRUE) %>% as.data.frame() %>% rownames_to_column("NCBI") %>% pivot_longer(names_to = "Sample", values_to = "NormAbundance",-NCBI)
Volcano Plot con las taxones con p-value ajustado menor a 0.001
tto_si_vs_control <- results(dds) %>%
as.data.frame() %>%
mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
rownames_to_column("NCBI") %>%
inner_join(taxtable) %>% mutate(label = ifelse(sig == "Significative",Name2,NA))
## Joining, by = "NCBI"
tto_si_vs_control %>% ggplot(aes(x= log2FoldChange, y = -log10(padj), color = sig, size = sqrt(baseMean), label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 4)+
scale_color_d3() +
theme_light() + labs(title = "tto parasiticida correcto Si vs Control")
Visualización individual de los taxones significativos.
results(dds) %>%
as.data.frame() %>%
mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
rownames_to_column("NCBI") %>%
inner_join(tabla2 %>% mutate(NCBI = as.character(NCBI))) %>%
filter(sig =="Significative") %>%
inner_join(normalized_counts) %>%
filter(tto_parasiticida_correcto %in% c("Si","Control")) %>%
ggplot(aes(x = tto_parasiticida_correcto, y = NormAbundance, fill = tto_parasiticida_correcto)) +
geom_violin() +
facet_grid(~Name2, scales = "free") +
scale_fill_d3() +
scale_y_log10() +
theme_light() +
labs(title = "tto parasiticida correcto Si vs Control")+
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))
## Joining, by = "NCBI"
## Joining, by = c("NCBI", "Sample")
Heat-Tree con los taxones significativos (p-values ajustado < 0.05) y su taxonomia asociada.
kraken_heat_tree(results(dds) %>% as.data.frame(),tabla2,0.05)
## Joining, by = "NCBI"
## Joining, by = "from"
## Joining, by = "id"
## Joining, by = "id"
## Using `tree` as default layout
Volcano Plot con las taxones con p-value ajustado menor a 0.001
tto_no_vs_control <- results(dds, contrast = c("tto_parasiticida_correcto","No","Control")) %>%
as.data.frame() %>%
mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
rownames_to_column("NCBI") %>%
inner_join(taxtable) %>% mutate(label = ifelse(sig == "Significative",Name2,NA))
## Joining, by = "NCBI"
tto_no_vs_control %>% ggplot(aes(x= log2FoldChange, y = -log10(padj), color = sig, size = sqrt(baseMean), label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 4)+
scale_color_d3() +
theme_light() + labs(title = "tto parasiticida correcto No vs Control")
Visualización individual de los taxones significativos.
results(dds,contrast = c("tto_parasiticida_correcto","No","Control")) %>%
as.data.frame() %>%
mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
rownames_to_column("NCBI") %>%
inner_join(tabla2 %>% mutate(NCBI = as.character(NCBI))) %>%
filter(sig =="Significative") %>%
inner_join(normalized_counts) %>%
filter(tto_parasiticida_correcto %in% c("No","Control")) %>%
ggplot(aes(x = tto_parasiticida_correcto, y = NormAbundance, fill = tto_parasiticida_correcto)) +
geom_violin() +
facet_grid(~Name2, scales = "free") +
scale_fill_d3() +
scale_y_log10() +
theme_light() +
labs(title = "tto parasiticida correcto No vs Control")+
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))
## Joining, by = "NCBI"
## Joining, by = c("NCBI", "Sample")
Heat-Tree con los taxones significativos (p-values ajustado < 0.05) y su taxonomia asociada.
kraken_heat_tree(results(dds,contrast = c("tto_parasiticida_correcto","No","Control")) %>% as.data.frame(),tabla2,0.05)
## Joining, by = "NCBI"
## Joining, by = "from"
## Joining, by = "id"
## Joining, by = "id"
## Using `tree` as default layout
Volcano Plot con las taxones con p-value ajustado menor a 0.001
tto_no_vs_si <- results(dds, contrast = c("tto_parasiticida_correcto","No","Si")) %>%
as.data.frame() %>%
mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
rownames_to_column("NCBI") %>%
inner_join(taxtable) %>% mutate(label = ifelse(sig == "Significative",Name2,NA))
## Joining, by = "NCBI"
tto_no_vs_si %>% ggplot(aes(x= log2FoldChange, y = -log10(padj), color = sig, size = sqrt(baseMean), label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 4)+
scale_color_d3() +
theme_light() + labs(title = "tto parasiticida correcto No vs Si")
Visualización individual de los taxones significativos.
results(dds,contrast = c("tto_parasiticida_correcto","No","Si")) %>%
as.data.frame() %>%
mutate(sig = ifelse(padj < 0.001,"Significative","No-Significative")) %>%
rownames_to_column("NCBI") %>%
inner_join(tabla2 %>% mutate(NCBI = as.character(NCBI))) %>%
filter(sig =="Significative") %>%
inner_join(normalized_counts) %>%
filter(tto_parasiticida_correcto %in% c("No","Si")) %>%
ggplot(aes(x = tto_parasiticida_correcto, y = NormAbundance, fill = tto_parasiticida_correcto)) +
geom_violin() +
facet_grid(~Name2, scales = "free") +
scale_fill_d3() +
scale_y_log10() +
theme_light() +
labs(title = "tto parasiticida correcto No vs Si")+
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1))
## Joining, by = "NCBI"
## Joining, by = c("NCBI", "Sample")
Heat-Tree con los taxones significativos (p-values ajustado < 0.05) y su taxonomia asociada.
kraken_heat_tree(results(dds,contrast = c("tto_parasiticida_correcto","No","Si")) %>% as.data.frame(),tabla2,0.05)
## Joining, by = "NCBI"
## Joining, by = "from"
## Joining, by = "id"
## Joining, by = "id"
## Using `tree` as default layout