Contents

library(PCAGenomicSignatures)
library(clusterProfiler)
library(org.Hs.eg.db)
library(msigdbr)
gsea_pvalue <- function(related, enriched, related_universe, universe) {
  deTable <- matrix(c(related, enriched-related, related_universe-related, 
                      universe - related_universe - enriched + related), 
                    nrow = 2,
                    dimnames = list(c("Keyword", "No.keyword"),
                                    c("Enriched", "Not.enriched")))
  fisher.test(deTable, alternative = "greater")
}

PCAmodel

dat_dir <- "~/data2/PCAGenomicSignatureLibrary/refinebioRseq"
# PCAmodel <- readRDS(file.path(dat_dir, "PCAmodel_1399/refinebioRseq_PCAmodel_hclust.rds"))
# PCAmodel <- readRDS(file.path(dat_dir, "PCAmodel_1399_C2/refinebioRseq_PCAmodel_hclust_C2.rds"))
PCAmodel <- readRDS(file.path(dat_dir, "PCAmodel_536/refinebioRseq_PCAmodel.rds"))
# PCAmodel <- readRDS(file.path(dat_dir, "PCAmodel_536_C2/refinebioRseq_PCAmodel.rds"))
# PCAmodel <- readRDS(file.path(dat_dir, "PCAmodel_536_allGenes/refinebioRseq_PCAmodel.rds"))
# PCAmodel <- readRDS(file.path(dat_dir, "PCAmodel_536_allGenes/without_rowNorm/refinebioRseq_PCAmodel.rds"))

Load E-MTAB-2452

dat_dir <- "~/data2/Genomic_Super_Signature/data"
annot.dat <- readr::read_tsv(file.path(dat_dir, "E-MTAB-2452_hugene11st_SCANfast_with_GeneSymbol.pcl")) %>% as.data.frame
rownames(annot.dat) <- annot.dat[, 2]

dataset <- as.matrix(annot.dat[, 3:ncol(annot.dat)])
rownames(dataset) <- annot.dat$GeneSymbol
dataset[1:2, 1:4]
#>      CD14_triad0058_1.CEL CD14_triad0058_2.CEL CD14_triad0058_3.CEL
#> A1BG         1.154439e-01           0.17987252           0.17031624
#> NAT2        -7.464545e-06          -0.09915562          -0.02097987
#>      CD14_triad0058_4.CEL
#> A1BG            0.1767473
#> NAT2           -0.1269907

Validate

HeatmapTable

val_all <- validate(dataset, PCAmodel)  
heatmapTable(val_all, num.out = 7)

MeSH wordcloud

heatmapTable(val_all, num.out = 5, swCutoff = 0)

validatedSig <- validatedSignatures(val_all, num.out = 5, swCutoff = 0)
validated_ind <- validatedSig[,"cl_num"]

set.seed(1)
drawWordcloud(PCAmodel, validated_ind[1])

drawWordcloud(PCAmodel, validated_ind[2])

drawWordcloud(PCAmodel, validated_ind[3])

drawWordcloud(PCAmodel, validated_ind[4])

drawWordcloud(PCAmodel, validated_ind[5])

GSEA

#> [1] "CD4"        "monocyte"   "neutrophil"
#> [1] "The number of unique pathways in the gene sets = 5529"
#> [1] "The number of pathways containing the keywords = 31"

MSigDB C2

validated_ind[2]

#> [1] "The number of enriched pathways = 458"
#> [1] "The number of enriched pathways containing keyword = 7"
#> [1] "p-value =  0.0116749611106464"

validated_ind[4]

#> [1] "The number of enriched pathways = 843"
#> [1] "The number of enriched pathways containing keyword = 9"
#> [1] "p-value =  0.0369727491835822"

validated_ind[5]

#> [1] "The number of enriched pathways = 303"
#> [1] "The number of enriched pathways containing keyword = 5"
#> [1] "p-value =  0.0251109269340919"

Priors from PLIER package

library(PLIER)

data(bloodCellMarkersIRISDMAP)
data(svmMarkers) 
allPaths <- combinePaths(bloodCellMarkersIRISDMAP, svmMarkers) 

source('~/data2/GenomicSuperSignature/R/gmtToMatrix.R')
term2gene <- matrixToTERM2GENE(allPaths)
#> [1] "CD4"        "monocyte"   "neutrophil"
#> [1] "The number of unique pathways in c(bloodCellMarkersIRISDMAP, svmMarkers) = 83"
#> [1] "The number of pathways containing the keywords = 14"

validated_ind[2]

#> [1] "The number of enriched pathways = 57"
#> [1] "The number of enriched pathways containing keyword = 14"
#> [1] "p-value =  0.0029093718501906"

validated_ind[4]

#> [1] "The number of enriched pathways = 58"
#> [1] "The number of enriched pathways containing keyword = 14"
#> [1] "p-value =  0.00383508107525125"

validated_ind[5]

#> [1] "The number of enriched pathways = 52"
#> [1] "The number of enriched pathways containing keyword = 12"
#> [1] "p-value =  0.0443972158960339"