Manual of fastANCOM

Example

We take the differential abundance analysis of 2 groups for illustration. The data was generated from Poisson distribution for each group with 30 individuals and 100 microbes. We set the first 10 microbes to be differetial by multipling a effect size vector for the abundance of one group, which is the log2FC from a norm distribution \(N(4, 1)\).

Data generation

set.seed(123)
data <- matrix(rpois(100*60, 10), 60)
b <- rnorm(10, 4)
data[31:60,1:10] <- t(t(data[31:60,1:10])*2^b)
group <- rep(c(0, 1), each=30)
rownames(data) <- paste0('sample', 1:60)
colnames(data) <- paste0('microbe', 1:100)
names(b) <- paste0('microbe', 1:10)

# take a look of the log2FC
kable_classic(kbl(t(round(b,4)),align= "l",caption='log2FC for the first 10 microbes'),
              html_font = "Cambria")

log2FC for the first 10 microbes
microbe1	microbe2	microbe3	microbe4	microbe5	microbe6	microbe7	microbe8	microbe9	microbe10
3.7867	5.1053	4.5898	3.3323	5.6418	3.1743	2.7887	2.5838	4.2968	3.4753


# take a look of the data
kable_classic(kbl(data[1:10, 1:6], align = "l",  caption='Data at a glance'), 
               full_width = T, html_font = "Cambria")

Data at a glance
	microbe1	microbe2	microbe3	microbe4	microbe5	microbe6
sample1	8	11	10	9	10	8
sample2	9	12	8	10	11	7
sample3	14	9	10	10	10	8
sample4	10	9	17	13	4	10
sample5	10	7	16	8	7	7
sample6	15	7	5	6	9	10
sample7	11	11	6	10	10	11
sample8	5	9	11	18	10	9
sample9	4	7	5	10	10	20
sample10	13	10	12	9	15	12

Model fitting

Here other parameters are set as default.

fit <- fastANCOM(Y=data, x=group)
final_fit <- fit$results$final
kable_classic(kbl(summary(fit), caption = 'Results summary'), 
              full_width = T, html_font = "Cambria")

Results summary
	Length	Class	Mode
global	0	-none-	NULL
results	3	-none-	list

kable_classic(kbl(head(final_fit, 10), align = "l", 
        caption='Results of fastANCOM'),  full_width = T, html_font = "Cambria")

Results of fastANCOM
	log2FC	log2FC.SD	Reject.number	REJECT
microbe1	3.911488	0.1406454	99	TRUE
microbe2	5.066569	0.1080591	99	TRUE
microbe3	4.605962	0.1536873	98	TRUE
microbe4	3.057501	0.1459191	97	TRUE
microbe5	5.643832	0.1470910	99	TRUE
microbe6	3.049002	0.1445558	97	TRUE
microbe7	2.847731	0.1346457	97	TRUE
microbe8	2.510676	0.1128027	99	TRUE
microbe9	4.322188	0.1337157	98	TRUE
microbe10	3.456564	0.1125187	99	TRUE

Take a look of the computation time with 100 replicates, fastANCOM thousands of times faster than original ANCOM

system.time(replicate(100, fit <- fastANCOM(Y=data, x=group, effect = T)))
#>    user  system elapsed 
#>   0.402   0.052   0.466
system.time(replicate(100, fit <- fastANCOM(Y=data, x=group, effect = F)))
#>    user  system elapsed 
#>   0.276   0.039   0.316
system.time(replicate(1, fit <- ANCOM(Y=data, x=group, tfun = wilcox.test))) 
#>    user  system elapsed 
#>   7.539   0.055   7.646
system.time(replicate(1, fit <- ANCOM(Y=data, x=group, tfun = t.test))) 
#>    user  system elapsed 
#>   5.246   0.023   5.283
system.time(replicate(100, fit <- ANCOM(Y=data, x=group, tfun = 't2'))) 
#>    user  system elapsed 
#>   5.587   2.174   7.764

Results with plot

We present plots of reject number of each microbe, valcano plot, and the log2 fold changes estimation compared to the true settings.

library(ggplot2)
library(ggpubr)
wp <- ggplot(data=final_fit, aes(x=1:100,y=Reject.number, color=REJECT, shape=REJECT)) + 
  geom_bar(aes(y=Reject.number), stat = "identity", width = 0.005, color='lightgrey') +
  # geom_vline(xintercept = 1:100, linetype='dashed', size=0.1) +
  geom_point(size=2) + theme_linedraw() + 
  labs(x='Microbe',y='W', title='Reject number') +
  scale_colour_brewer(palette = "Set1", direction=-1) + 
  theme(legend.position = c(0.85, 0.78), 
        legend.background = element_rect(fill = "white", colour = "black"))
wp


vp <-ggplot(data=final_fit,aes(x=log2FC,y=-log10(log2FC.pval), color=REJECT, shape=REJECT)) +
  geom_hline(yintercept=-log10(0.05/nrow(final_fit)),
             linetype="dashed",color="black",size=0.5)+ 
  geom_point(size=2) + theme_linedraw() + labs(y='log2(p-value)', title='Volcano plot') +
  theme(legend.position='none') +
  scale_colour_brewer(palette = "Set1",direction=-1) 
vp


tmpb <- data.frame(Microbe=factor(1:10),log2FC=c(b, final_fit[final_fit$REJECT, 1]), 
                   se=c(rep(0, 10), final_fit[final_fit$REJECT, 2]),
                   Type=c(rep('Truth', 10), rep('Estimation', 10)))
ep <- ggplot(data=tmpb, aes(x=Microbe, y=log2FC, color=Type, shape=Type))  +
  geom_point(size=2, position =  position_dodge(0.2) ) + 
  geom_errorbar(aes(ymin=log2FC-2*se, ymax=log2FC+2*se),
                width=0.2, position =  position_dodge(0.2)) +
  theme_linedraw() + labs(title='Log2FC estimation') +
  scale_colour_brewer(palette = "Set1", direction=-1) +
   theme(legend.position = c(0.7, 0.8), 
        legend.background = element_rect(fill = "white", colour = "black"))
ep

sessionInfo()
#> R version 4.0.3 (2020-10-10)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Mojave 10.14.6
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] zh_CN.UTF-8/zh_CN.UTF-8/zh_CN.UTF-8/C/zh_CN.UTF-8/zh_CN.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] ggpubr_0.4.0     ggplot2_3.3.2    fastANCOM_0.0.4  kableExtra_1.3.1
#> 
#> loaded via a namespace (and not attached):
#>  [1] tidyselect_1.1.0   xfun_0.25          bslib_0.2.5.1      purrr_0.3.4       
#>  [5] haven_2.3.1        lattice_0.20-41    carData_3.0-4      colorspace_1.4-1  
#>  [9] vctrs_0.3.4        generics_0.1.0     htmltools_0.5.1.1  viridisLite_0.3.0 
#> [13] yaml_2.2.1         rlang_0.4.11       jquerylib_0.1.4    pillar_1.4.6      
#> [17] foreign_0.8-80     glue_1.4.2         withr_2.3.0        RColorBrewer_1.1-2
#> [21] readxl_1.3.1       lifecycle_0.2.0    stringr_1.4.0      cellranger_1.1.0  
#> [25] munsell_0.5.0      ggsignif_0.6.0     gtable_0.3.0       zip_2.1.1         
#> [29] rvest_0.3.6        evaluate_0.14      labeling_0.4.2     forcats_0.5.0     
#> [33] knitr_1.33         rio_0.5.16         curl_4.3           highr_0.8         
#> [37] Rcpp_1.0.5         broom_0.7.2        scales_1.1.1       backports_1.1.10  
#> [41] webshot_0.5.2      jsonlite_1.7.1     abind_1.4-5        farver_2.0.3      
#> [45] hms_0.5.3          digest_0.6.27      openxlsx_4.2.3     stringi_1.5.3     
#> [49] rstatix_0.6.0      dplyr_1.0.2        grid_4.0.3         tools_4.0.3       
#> [53] magrittr_2.0.1     sass_0.4.0         tibble_3.0.4       crayon_1.3.4      
#> [57] tidyr_1.1.2        car_3.0-10         pkgconfig_2.0.3    ellipsis_0.3.1    
#> [61] data.table_1.13.2  xml2_1.3.2         rmarkdown_2.10     httr_1.4.2        
#> [65] rstudioapi_0.11    R6_2.5.0           nlme_3.1-149       compiler_4.0.3

Manual of fastANCOM

Install and load fastANCOM

Example

Data generation

Model fitting

Results with plot