My goal is to find a blood-based gene expression signature that distinguishes people with latent TB from those who are healthy or have active TB. This analysis uses TB\_human\_datasets\_04\_2018 from data/TB\_human\_datasets\_04\_2018.RData.
Following the typical MetaIntegrator meta-analysis workflow, I first choose 6 discovery datasets, which are identified by their GSE numbers, and created lVsAHDiscovery in which to store these:
library(magrittr)
library(purrr)
#>
#> Attaching package: 'purrr'
#> The following object is masked from 'package:magrittr':
#>
#> set_names
library(glue)
devtools::load_all()
#> Loading tb
load(here::here("data", "TB_human_datasets_04_2018.RData"))
discoveryGSEIDs = c("GSE19491", "GSE28623", "GSE54992", "GSE37250", "GSE39939.noCultureNeg", "GSE39940")
case_classes = c("Latent TB")
control_classes = c("Active TB", "Healthy")
discoveryGSEs = lapply(TB_human_datasets_04_2018[discoveryGSEIDs], function(gse) {
filteredGSE = .filterGSE(gse, classesOfInterest=c(case_classes, control_classes))
filteredGSEWithClass = .addClassVec(filteredGSE, caseClasses=case_classes)
filteredGSEWithClass
})
# Only keep GSEs which have >= 2 cases and >= 2 controls, for effect size (Hedges' g) to be well-defined.
discoveryGSEs = discoveryGSEs %>%
keep(function(gse) sum(gse$class) > 1 && sum(gse$class) < length(gse$class)-1)lVsAHDiscovery = MetaIntegrator::runMetaAnalysis(list(originalData=discoveryGSEs))
#> Setting options('download.file.method.GEOquery'='auto')
#> Setting options('GEOquery.inmemory.gpl'=FALSE)
#> Warning: replacing previous import 'data.table::first' by 'dplyr::first'
#> when loading 'MetaIntegrator'
#> Warning: replacing previous import 'data.table::between' by
#> 'dplyr::between' when loading 'MetaIntegrator'
#> Warning: replacing previous import 'data.table::last' by 'dplyr::last' when
#> loading 'MetaIntegrator'
#> Warning: replacing previous import 'Biobase::combine' by 'dplyr::combine'
#> when loading 'MetaIntegrator'
#> Warning: replacing previous import 'dplyr::failwith' by 'plyr::failwith'
#> when loading 'MetaIntegrator'
#> Warning: replacing previous import 'dplyr::id' by 'plyr::id' when loading
#> 'MetaIntegrator'
#> Warning: replacing previous import 'dplyr::summarize' by 'plyr::summarize'
#> when loading 'MetaIntegrator'
#> Warning: replacing previous import 'dplyr::count' by 'plyr::count' when
#> loading 'MetaIntegrator'
#> Warning: replacing previous import 'dplyr::desc' by 'plyr::desc' when
#> loading 'MetaIntegrator'
#> Warning: replacing previous import 'dplyr::mutate' by 'plyr::mutate' when
#> loading 'MetaIntegrator'
#> Warning: replacing previous import 'dplyr::arrange' by 'plyr::arrange' when
#> loading 'MetaIntegrator'
#> Warning: replacing previous import 'dplyr::rename' by 'plyr::rename' when
#> loading 'MetaIntegrator'
#> Warning: replacing previous import 'dplyr::summarise' by 'plyr::summarise'
#> when loading 'MetaIntegrator'
#> Warning: replacing previous import 'plyr::rename' by 'reshape::rename' when
#> loading 'MetaIntegrator'
#> Warning: replacing previous import 'data.table::melt' by 'reshape::melt'
#> when loading 'MetaIntegrator'
#> Warning: replacing previous import 'plyr::round_any' by
#> 'reshape::round_any' when loading 'MetaIntegrator'
#> Warning: replacing previous import 'reshape::recast' by 'reshape2::recast'
#> when loading 'MetaIntegrator'
#> Warning: replacing previous import 'reshape::melt' by 'reshape2::melt' when
#> loading 'MetaIntegrator'
#> Warning: replacing previous import 'reshape::colsplit' by
#> 'reshape2::colsplit' when loading 'MetaIntegrator'
#> Warning: replacing previous import 'data.table::dcast' by 'reshape2::dcast'
#> when loading 'MetaIntegrator'
#> Found common probes in 4
#> Found common probes in 5
#> Found common probes in 6
#> Computing effect sizes...
#> Computing summary effect sizes...
#> Computing Fisher's output...validationGSEIDs = c("GSE62525", "GSEScribaDay0to7", "GSE84076", "GSE101705", "GSE73408", "GSE69581")
validationGSEs = lapply(TB_human_datasets_04_2018[validationGSEIDs], function(gse) {
filteredGSE = .filterGSE(gse, classesOfInterest=c(case_classes, control_classes))
filteredGSEWithClass = .addClassVec(filteredGSE, caseClasses=case_classes)
filteredGSEWithClass
})
validationGSEs = validationGSEs %>%
keep(function(gse) sum(gse$class) > 1 && sum(gse$class) < length(gse$class)-1)lVsAHValidation = MetaIntegrator::runMetaAnalysis(list(originalData=validationGSEs))
#> Found common probes in 3
#> Found common probes in 4
#> Computing effect sizes...
#> Computing summary effect sizes...
#> Computing Fisher's output...effectSizeThresholds = c(0.6, 0.8, 1)
geneFilters = effectSizeThresholds %>% map(~MetaIntegrator::filterGenes(lVsAHDiscovery, numberStudiesThresh = 4, FDRThresh = 0.1, effectSizeThresh = .)) %>% map(~.$filterResults[[1]]) %>% map(~.removeLOCMIRFromFilter(.)) %>% map(~MetaIntegrator::forwardSearch(lVsAHDiscovery, .))
#> Converting probes:genes for gene list for all discovery GEMs...... done.
#> next best= 793.9697
#> Adding SMARCD3 793.9697
#> next best= 34.92217
#> Adding WDR73 828.8919
#> next best= 16.21166
#> Adding GTF2H4 845.1035
#> next best= 7.28888
#> Adding FCGBP 852.3924
#> next best= 7.037914
#> Adding CTSD 859.4303
#> next best= 3.889181
#> Adding TOP3B 863.3195
#> next best= 3.621661
#> Adding POLD2 866.9412
#> next best= 1.375401
#> Adding DNAJC13 868.3166
#> next best= 1.740001
#> Adding VAMP5 870.0566
#> next best= 2.5914
#> Adding METTL9 872.648
#> next best= 1.619676
#> Adding MED6 874.2677
#> next best= 1.352877
#> Adding ITGAM 875.6205
#> next best= 2.188995
#> Adding FCGR1C 877.8095
#> next best= 1.880087
#> Adding ATP6V0A1 879.6896
#> next best= 0.8765973
#> Adding IFT20 880.5662
#> next best= 1.685723
#> Adding KCTD14 882.2519
#> next best= 2.989615
#> Adding FLJ38717 885.2415
#> next best= 1.71511
#> Adding TCF4 886.9567
#> next best= 1.484682
#> Adding LONP2 888.4413
#> next best= 0.9167106
#> Adding TPST1 889.3581
#> next best= 2.692505
#> Adding ELANE 892.0506
#> next best= 0.7166615
#> Adding NCRNA00120 892.7672
#> next best= 0.8629502
#> Adding TOP1P2 893.6302
#> next best= 0.7634589
#> Adding PARM1 894.3936
#> next best= 0.6739637
#> Adding CBX7 895.0676
#> next best= 0.674939
#> Adding ABHD6 895.7425
#> next best= 0.3149729
#> Adding C1orf93 896.0575
#> next best= 0.5273649
#> Adding VWA5A 896.5849
#> next best= 0.5315503
#> Adding GAGE2B 897.1164
#> next best= 0.1393486
#> Adding GPN1 897.2558
#> next best= 0.4369288
#> Adding RNASEH1 897.6927
#> next best= -0.2937387
#>
#> Final discovery AUCs from forward search genes:
#> GSE19491 0.941
#> GSE28623 0.879
#> GSE54992 0.933
#> GSE37250 0.945
#> GSE39939.noCultureNeg 0.984
#> GSE39940 0.968
#> mean AUC: 0.9416667
#> Converting probes:genes for gene list for all discovery GEMs...... done.
#> next best= 793.9697
#> Adding SMARCD3 793.9697
#> next best= 31.19408
#> Adding GTF2H4 825.1638
#> next best= 21.67877
#> Adding CNNM3 846.8426
#> next best= 6.036743
#> Adding CBX7 852.8793
#> next best= 5.527485
#> Adding HP 858.4068
#> next best= 7.508825
#> Adding FCGR1C 865.9156
#> next best= 2.537776
#> Adding CTSD 868.4534
#> next best= 2.608416
#> Adding TMEM167A 871.0618
#> next best= 0.7638912
#> Adding C1orf203 871.8257
#> next best= 0.6751241
#> Adding FNDC3B 872.5008
#> next best= 0.1570987
#> Adding ZNF689 872.6579
#> next best= 0.988671
#> Adding CD63 873.6466
#> next best= -0.04378817
#>
#> Final discovery AUCs from forward search genes:
#> GSE19491 0.923
#> GSE28623 0.826
#> GSE54992 0.844
#> GSE37250 0.927
#> GSE39939.noCultureNeg 0.988
#> GSE39940 0.942
#> mean AUC: 0.9083333
#> Converting probes:genes for gene list for all discovery GEMs...... done.
#> next best= 781.0584
#> Adding HP 781.0584
#> next best= 35.53863
#> Adding FCGR1C 816.597
#> next best= 12.47897
#> Adding AGTRAP 829.076
#> next best= 8.734183
#> Adding C16orf67 837.8101
#> next best= 4.464736
#> Adding MGC3020 842.2749
#> next best= 3.805493
#> Adding SIGLEC16 846.0804
#> next best= 2.32786
#> Adding RNF220 848.4082
#> next best= 0.928796
#> Adding DEFA1B 849.337
#> next best= 2.466072
#> Adding ITGAM 851.8031
#> next best= 0.4808456
#> Adding KIF22 852.2839
#> next best= 1.043822
#> Adding FAM62B 853.3278
#> next best= -0.365591
#>
#> Final discovery AUCs from forward search genes:
#> GSE19491 0.856
#> GSE28623 0.812
#> GSE54992 0.778
#> GSE37250 0.931
#> GSE39939.noCultureNeg 0.978
#> GSE39940 0.931
#> mean AUC: 0.881
names(geneFilters) = geneFilters %>% map_chr(~.describeGeneSig(., case_classes = case_classes, control_classes = control_classes, datasets = discoveryGSEIDs))library(gridExtra)
plots = names(geneFilters) %>% map(function(gfName) {
title = stringr::str_wrap(gfName, width = 80)
prc = MetaIntegrator::multiplePRCplot(lVsAHValidation, geneFilters[[gfName]], title = title, size = 12)
roc = MetaIntegrator::multipleROCPlot(lVsAHValidation, geneFilters[[gfName]], title = title, size = 12)
return(list(prc=prc, roc=roc))
}) %>% unlist(recursive = FALSE)
do.call(grid.arrange, c(plots, ncol=2))