Contents

1 References

1.0.1 Publication

Oh S, Geistlinger L, Ramos M, Blankenberg D, van den Beek M, Taroni JN, Carey VJ, Waldron L, Davis S. GenomicSuperSignature facilitates interpretation of RNA-seq experiments through robust, efficient comparison to public databases. Nature Communications 2022;13: 3695. doi: 10.1038/s41467-022-31411-3

1.0.2 Bioconductor Package

GenomicSuperSignature

1.0.3 Use case and reproducible codes

GenomicSuperSignaturePaper

2 Setup

2.1 Load packages

BiocManager::install("shbrief/GenomicSuperSignature")

suppressPackageStartupMessages({
    library(GenomicSuperSignature)
    library(bcellViper)
    library(dplyr)
    library(AnVIL)
})

2.2 Load RAVmodels

## getModel
system.time(RAVmodel <- getModel("PLIERpriors"))   
## [1] "downloading"
##    user  system elapsed 
##   6.824   1.919  16.506
system.time(RAVmodel_C2 <- getModel("C2"))   
## [1] "downloading"
##    user  system elapsed 
##   6.878   1.849  18.133

4 [Slide 17] TCGA-BRCA

TCGA_validationDatasets.rda file was prepared using this script.

## The data file stored in Google Cloud Bucket
## log-transformed raw data
dir <- "gs://genomic_super_signature"
fpath <- file.path(dir, "TCGA_validationDatasets.rda") # AnVIL package

## Load the data
load(gsutil_pipe(fpath))

## Panel B
brca <- TCGA_validationDatasets[["BRCA"]]
system.time(val_brca <- validate(brca, RAVmodel_C2))
##    user  system elapsed 
##   1.029   0.158   1.195
heatmapTable(val_brca, RAVmodel_C2)


## Panel C
drawWordcloud(RAVmodel, 221)


## Panel D
findStudiesInCluster(RAVmodel, 221, studyTitle = TRUE)
##   studyName PC Variance explained (%)
## 1 ERP016798  2                   8.25
## 2 SRP023262  9                   1.07
## 3 SRP111343  3                   4.46
##                                                                                                   title
## 1                                             Whole transcriptome profiling of 63 breast cancer tumours
## 2 A shared transcriptional program in early breast neoplasias despite genetic and clinical distinctions
## 3                             RNAseq analysis of chemotherapy and radiation therapy-naïve breast tumors

## Panel E
subsetEnrichedPathways(RAVmodel, 221, include_nes = TRUE) %>% as.data.frame
##                                 RAV221.Description RAV221.NES
## Up_1  REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM  -2.272304
## Up_2               REACTOME_ADAPTIVE_IMMUNE_SYSTEM  -2.287344
## Up_3              IRIS_DendriticCell-LPSstimulated  -2.304916
## Up_4               REACTOME_MITOTIC_G1_G1_S_PHASES  -2.368176
## Up_5                            IRIS_Monocyte-Day7  -2.407126
## Up_6                REACTOME_MITOTIC_M_M_G1_PHASES  -2.450146
## Up_7                      REACTOME_DNA_REPLICATION  -2.491313
## Up_8                           REACTOME_CELL_CYCLE  -2.499352
## Up_9                   REACTOME_CELL_CYCLE_MITOTIC  -2.546613
## Up_10                              KEGG_CELL_CYCLE  -2.548161
# annotateRAV(RAVmodel, 221, n = 10)

5 [Slide 18] Annotated PCA plot

E-MTAB-2452 dataset

## The data file stored in Google Cloud Bucket using AnVIL package
dir <- "gs://genomic_super_signature"
fpath <- file.path(dir, "E-MTAB-2452_hugene11st_SCANfast_with_GeneSymbol.pcl")
x <- gsutil_pipe(fpath, open = "rb")

## Load the data
annot.dat <- readr::read_tsv(x, show_col_types = FALSE) %>% as.data.frame
rownames(annot.dat) <- annot.dat[, 2]

dataset <- as.matrix(annot.dat[, 3:ncol(annot.dat)])
rownames(dataset) <- annot.dat$GeneSymbol
dataset[1:3, 1:3]
##      CD14_triad0058_1.CEL CD14_triad0058_2.CEL CD14_triad0058_3.CEL
## A1BG         1.154439e-01           0.17987252           0.17031624
## NAT2        -7.464545e-06          -0.09915562          -0.02097987
## ADA          5.338218e-01           0.67212096           0.76375038
system.time(val_all <- validate(dataset, RAVmodel))
##    user  system elapsed 
##   0.842   0.052   0.896
annotatePC(2, val_all, RAVmodel, simplify = FALSE)
## $`PC2-RAV1552`
##                  Description      NES pvalue      qvalues
## 1         IRIS_Monocyte-Day0 2.586697  1e-10 2.680702e-09
## 2 IRIS_DendriticCell-Control 2.433219  1e-10 2.680702e-09
## 3                 DMAP_MONO2 2.376574  1e-10 2.680702e-09
## 4         IRIS_Monocyte-Day7 2.366122  1e-10 2.680702e-09
## 5              SVM Monocytes 2.314221  1e-10 2.680702e-09
annotatePC(1:3, val_all, RAVmodel, scoreCutoff = 0)
##                        PC1.RAV23                PC2.RAV1552
## 1                SVM T cells CD8         IRIS_Monocyte-Day0
## 2          SVM T cells CD4 naive IRIS_DendriticCell-Control
## 3  SVM T cells follicular helper                 DMAP_MONO2
## 4 SVM T cells regulatory (Tregs)         IRIS_Monocyte-Day7
## 5        SVM T cells gamma delta              SVM Monocytes
##                                        PC3.RAV1387
## 1                  MIPS_55S_RIBOSOME_MITOCHONDRIAL
## 2 REACTOME_RESPIRATORY_ELECTRON_TRANSPORT_ATP_S...
## 3         MIPS_39S_RIBOSOMAL_SUBUNIT_MITOCHONDRIAL
## 4 REACTOME_TCA_CYCLE_AND_RESPIRATORY_ELECTRON_T...
## 5          REACTOME_RESPIRATORY_ELECTRON_TRANSPORT

Label each sample with their known cell type.

cellType <- gsub("_.*$", "", colnames(dataset))
cellType <- gsub("CD4", "CD4,T cell", cellType)
cellType <- gsub("CD14", "CD14,monocyte", cellType)
cellType <- gsub("CD16", "CD16,neutrophil", cellType)
names(cellType) <- colnames(dataset)
plotAnnotatedPCA(dataset, RAVmodel, c(2,3), val_all, 
                 scoreCutoff = 0.3, 
                 color_by = cellType, 
                 color_lab = "Cell Type")

6 Session Info

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.2.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] AnVIL_1.9.5                 dplyr_1.0.9                
##  [3] bcellViper_1.30.0           GenomicSuperSignature_1.5.3
##  [5] SummarizedExperiment_1.24.0 Biobase_2.54.0             
##  [7] GenomicRanges_1.46.1        GenomeInfoDb_1.30.1        
##  [9] IRanges_2.28.0              S4Vectors_0.32.4           
## [11] BiocGenerics_0.40.0         MatrixGenerics_1.6.0       
## [13] matrixStats_0.62.0          BiocStyle_2.22.0           
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-3       ggsignif_0.6.3         rjson_0.2.21          
##   [4] ellipsis_0.3.2         flextable_0.7.2        circlize_0.4.15       
##   [7] futile.logger_1.4.3    XVector_0.34.0         base64enc_0.1-3       
##  [10] GlobalOptions_0.1.2    clue_0.3-61            rstudioapi_0.13       
##  [13] farver_2.1.1           ggpubr_0.4.0           remotes_2.4.2         
##  [16] bit64_4.0.5            fansi_1.0.3            xml2_1.3.3            
##  [19] codetools_0.2-18       doParallel_1.0.17      cachem_1.0.6          
##  [22] knitr_1.39             jsonlite_1.8.0         broom_1.0.0           
##  [25] cluster_2.1.3          dbplyr_2.2.1           png_0.1-7             
##  [28] BiocManager_1.30.18    readr_2.1.2            compiler_4.1.2        
##  [31] httr_1.4.3             backports_1.4.1        assertthat_0.2.1      
##  [34] Matrix_1.4-1           fastmap_1.1.0          cli_3.3.0             
##  [37] formatR_1.12           htmltools_0.5.3        tools_4.1.2           
##  [40] gtable_0.3.0           glue_1.6.2             GenomeInfoDbData_1.2.7
##  [43] rappdirs_0.3.3         Rcpp_1.0.9             rapiclient_0.1.3      
##  [46] carData_3.0-5          jquerylib_0.1.4        vctrs_0.4.1           
##  [49] iterators_1.0.14       xfun_0.31              stringr_1.4.0         
##  [52] ps_1.7.1               lifecycle_1.0.1        irlba_2.3.5           
##  [55] rstatix_0.7.0          zlibbioc_1.40.0        scales_1.2.0          
##  [58] vroom_1.5.7            hms_1.1.1              parallel_4.1.2        
##  [61] lambda.r_1.2.4         RColorBrewer_1.1-3     ComplexHeatmap_2.10.0 
##  [64] yaml_2.3.5             curl_4.3.2             memoise_2.0.1         
##  [67] ggplot2_3.3.6          gdtools_0.2.4          sass_0.4.2            
##  [70] stringi_1.7.8          RSQLite_2.2.15         highr_0.9             
##  [73] foreach_1.5.2          filelock_1.0.2         zip_2.2.0             
##  [76] shape_1.4.6            systemfonts_1.0.4      rlang_1.0.4           
##  [79] pkgconfig_2.0.3        bitops_1.0-7           evaluate_0.15         
##  [82] lattice_0.20-45        purrr_0.3.4            labeling_0.4.2        
##  [85] cowplot_1.1.1          processx_3.7.0         bit_4.0.4             
##  [88] tidyselect_1.1.2       magrittr_2.0.3         bookdown_0.27         
##  [91] R6_2.5.1               magick_2.7.3           generics_0.1.3        
##  [94] DelayedArray_0.20.0    DBI_1.1.3              pillar_1.8.0          
##  [97] abind_1.4-5            RCurl_1.98-1.7         tibble_3.1.8          
## [100] crayon_1.5.1           car_3.1-0              futile.options_1.0.1  
## [103] uuid_1.1-0             wordcloud_2.6          utf8_1.2.2            
## [106] BiocFileCache_2.2.1    officer_0.4.3          tzdb_0.3.0            
## [109] rmarkdown_2.14         GetoptLong_1.0.5       grid_4.1.2            
## [112] data.table_1.14.2      callr_3.7.1            blob_1.2.3            
## [115] webshot_0.5.3          digest_0.6.29          tidyr_1.2.0           
## [118] munsell_0.5.0          bslib_0.4.0