library(PCAGenomicSignatures)
library(clusterProfiler)
library(org.Hs.eg.db)
library(msigdbr)

gsea_pvalue <- function(related, enriched, related_universe, universe) {
  deTable <- matrix(c(related, enriched-related, related_universe-related, 
                      universe - related_universe - enriched + related), 
                    nrow = 2,
                    dimnames = list(c("Keyword", "No.keyword"),
                                    c("Enriched", "Not.enriched")))
  fisher.test(deTable, alternative = "greater")
}

PCAmodel

dat_dir <- "~/data2/PCAGenomicSignatureLibrary/refinebioRseq"
# PCAmodel <- readRDS(file.path(dat_dir, "PCAmodel_1399/refinebioRseq_PCAmodel_hclust.rds"))
# PCAmodel <- readRDS(file.path(dat_dir, "PCAmodel_1399_C2/refinebioRseq_PCAmodel_hclust_C2.rds"))
# PCAmodel <- readRDS(file.path(dat_dir, "PCAmodel_536/refinebioRseq_PCAmodel.rds"))
# PCAmodel <- readRDS(file.path(dat_dir, "PCAmodel_536_C2/refinebioRseq_PCAmodel.rds"))
# PCAmodel <- readRDS(file.path(dat_dir, "PCAmodel_536_allGenes/refinebioRseq_PCAmodel.rds"))
PCAmodel <- readRDS(file.path(dat_dir, "PCAmodel_536_allGenes/without_rowNorm/refinebioRseq_PCAmodel.rds"))

Load E-MTAB-2452

dat_dir <- "~/data2/Genomic_Super_Signature/data"
annot.dat <- readr::read_tsv(file.path(dat_dir, "E-MTAB-2452_hugene11st_SCANfast_with_GeneSymbol.pcl")) %>% as.data.frame
rownames(annot.dat) <- annot.dat[, 2]

dataset <- as.matrix(annot.dat[, 3:ncol(annot.dat)])
rownames(dataset) <- annot.dat$GeneSymbol
dataset[1:2, 1:4]
#>      CD14_triad0058_1.CEL CD14_triad0058_2.CEL CD14_triad0058_3.CEL
#> A1BG         1.154439e-01           0.17987252           0.17031624
#> NAT2        -7.464545e-06          -0.09915562          -0.02097987
#>      CD14_triad0058_4.CEL
#> A1BG            0.1767473
#> NAT2           -0.1269907

Validate

HeatmapTable

val_all <- validate(dataset, PCAmodel)  
heatmapTable(val_all, num.out = 7)

MeSH wordcloud

heatmapTable(val_all, num.out = 5, swCutoff = 0)

validatedSig <- validatedSignatures(val_all, num.out = 5, swCutoff = 0)
validated_ind <- validatedSig[,"cl_num"]

set.seed(1)
drawWordcloud(PCAmodel, validated_ind[1])

drawWordcloud(PCAmodel, validated_ind[2])

drawWordcloud(PCAmodel, validated_ind[3])

drawWordcloud(PCAmodel, validated_ind[4])

drawWordcloud(PCAmodel, validated_ind[5])

GSEA

#> [1] "CD4"        "monocyte"   "neutrophil"
#> [1] "The number of unique pathways in the gene sets = 5529"
#> [1] "The number of pathways containing the keywords = 31"

MSigDB C2

validated_ind[1]

#> [1] "The number of enriched pathways = 909"
#> [1] "The number of enriched pathways containing keyword = 7"
#> [1] "p-value =  0.238544509476538"

validated_ind[2]

#> [1] "The number of enriched pathways = 906"
#> [1] "The number of enriched pathways containing keyword = 7"
#> [1] "p-value =  0.235897358467999"

validated_ind[3]

#> [1] "The number of enriched pathways = 1531"
#> [1] "The number of enriched pathways containing keyword = 11"
#> [1] "p-value =  0.216813728839481"

Priors from PLIER package

library(PLIER)

data(bloodCellMarkersIRISDMAP)
data(svmMarkers) 
allPaths <- combinePaths(bloodCellMarkersIRISDMAP, svmMarkers) 

source('~/data2/GenomicSuperSignature/R/gmtToMatrix.R')
term2gene <- matrixToTERM2GENE(allPaths)

#> [1] "CD4"        "monocyte"   "neutrophil"
#> [1] "The number of unique pathways in c(bloodCellMarkersIRISDMAP, svmMarkers) = 83"
#> [1] "The number of pathways containing the keywords = 14"

validated_ind[1]

#> [1] "The number of enriched pathways = 50"
#> [1] "The number of enriched pathways containing keyword = 8"
#> [1] "p-value =  0.714672311253075"

validated_ind[2]

#> [1] "The number of enriched pathways = 51"
#> [1] "The number of enriched pathways containing keyword = 8"
#> [1] "p-value =  0.748896136576048"

validated_ind[3]

#> [1] "The number of enriched pathways = 49"
#> [1] "The number of enriched pathways containing keyword = 10"
#> [1] "p-value =  0.233222747862607"

PCAmodel validation using T-MTAB-2452

August 20, 2020

Contents

PCAmodel

Load E-MTAB-2452

Validate

HeatmapTable

MeSH wordcloud

GSEA

MSigDB C2

validated_ind[1]

validated_ind[2]

validated_ind[3]

Priors from PLIER package

validated_ind[1]

validated_ind[2]

validated_ind[3]