Contents
library(PCAGenomicSignatures)
library(clusterProfiler)
library(org.Hs.eg.db)
library(msigdbr)
gsea_pvalue <- function(related, enriched, related_universe, universe) {
deTable <- matrix(c(related, enriched-related, related_universe-related,
universe - related_universe - enriched + related),
nrow = 2,
dimnames = list(c("Keyword", "No.keyword"),
c("Enriched", "Not.enriched")))
fisher.test(deTable, alternative = "greater")
}
PCAmodel
dat_dir <- "~/data2/PCAGenomicSignatureLibrary/refinebioRseq"
# PCAmodel <- readRDS(file.path(dat_dir, "PCAmodel_1399/refinebioRseq_PCAmodel_hclust.rds"))
# PCAmodel <- readRDS(file.path(dat_dir, "PCAmodel_1399_C2/refinebioRseq_PCAmodel_hclust_C2.rds"))
# PCAmodel <- readRDS(file.path(dat_dir, "PCAmodel_536/refinebioRseq_PCAmodel.rds"))
# PCAmodel <- readRDS(file.path(dat_dir, "PCAmodel_536_C2/refinebioRseq_PCAmodel.rds"))
# PCAmodel <- readRDS(file.path(dat_dir, "PCAmodel_536_allGenes/refinebioRseq_PCAmodel.rds"))
PCAmodel <- readRDS(file.path(dat_dir, "PCAmodel_536_allGenes/without_rowNorm/refinebioRseq_PCAmodel.rds"))
Load E-MTAB-2452
dat_dir <- "~/data2/Genomic_Super_Signature/data"
annot.dat <- readr::read_tsv(file.path(dat_dir, "E-MTAB-2452_hugene11st_SCANfast_with_GeneSymbol.pcl")) %>% as.data.frame
rownames(annot.dat) <- annot.dat[, 2]
dataset <- as.matrix(annot.dat[, 3:ncol(annot.dat)])
rownames(dataset) <- annot.dat$GeneSymbol
dataset[1:2, 1:4]
#> CD14_triad0058_1.CEL CD14_triad0058_2.CEL CD14_triad0058_3.CEL
#> A1BG 1.154439e-01 0.17987252 0.17031624
#> NAT2 -7.464545e-06 -0.09915562 -0.02097987
#> CD14_triad0058_4.CEL
#> A1BG 0.1767473
#> NAT2 -0.1269907
Validate
HeatmapTable
val_all <- validate(dataset, PCAmodel)
heatmapTable(val_all, num.out = 7)

MeSH wordcloud
heatmapTable(val_all, num.out = 5, swCutoff = 0)

validatedSig <- validatedSignatures(val_all, num.out = 5, swCutoff = 0)
validated_ind <- validatedSig[,"cl_num"]
set.seed(1)
drawWordcloud(PCAmodel, validated_ind[1])

drawWordcloud(PCAmodel, validated_ind[2])

drawWordcloud(PCAmodel, validated_ind[3])

drawWordcloud(PCAmodel, validated_ind[4])

drawWordcloud(PCAmodel, validated_ind[5])

GSEA
#> [1] "CD4" "monocyte" "neutrophil"
#> [1] "The number of unique pathways in the gene sets = 5529"
#> [1] "The number of pathways containing the keywords = 31"
MSigDB C2
validated_ind[1]
#> [1] "The number of enriched pathways = 909"
#> [1] "The number of enriched pathways containing keyword = 7"
#> [1] "p-value = 0.238544509476538"
validated_ind[2]
#> [1] "The number of enriched pathways = 906"
#> [1] "The number of enriched pathways containing keyword = 7"
#> [1] "p-value = 0.235897358467999"
validated_ind[3]
#> [1] "The number of enriched pathways = 1531"
#> [1] "The number of enriched pathways containing keyword = 11"
#> [1] "p-value = 0.216813728839481"
Priors from PLIER package
library(PLIER)
data(bloodCellMarkersIRISDMAP)
data(svmMarkers)
allPaths <- combinePaths(bloodCellMarkersIRISDMAP, svmMarkers)
source('~/data2/GenomicSuperSignature/R/gmtToMatrix.R')
term2gene <- matrixToTERM2GENE(allPaths)
#> [1] "CD4" "monocyte" "neutrophil"
#> [1] "The number of unique pathways in c(bloodCellMarkersIRISDMAP, svmMarkers) = 83"
#> [1] "The number of pathways containing the keywords = 14"
validated_ind[1]
#> [1] "The number of enriched pathways = 50"
#> [1] "The number of enriched pathways containing keyword = 8"
#> [1] "p-value = 0.714672311253075"
validated_ind[2]
#> [1] "The number of enriched pathways = 51"
#> [1] "The number of enriched pathways containing keyword = 8"
#> [1] "p-value = 0.748896136576048"
validated_ind[3]
#> [1] "The number of enriched pathways = 49"
#> [1] "The number of enriched pathways containing keyword = 10"
#> [1] "p-value = 0.233222747862607"