Contents

library(SummarizedExperiment)
library(dplyr)
library(GenomicSignatures)
library(circlize)
library(grid)
library(tm)
library(SnowballC)
library(wordcloud)
library(RColorBrewer)
library(magrittr)

1 Load PCAmodels

model_dir = "~/data2/PCAGenomicSignatures_Library"

c2cp = readRDS(file.path(model_dir, "canonicalPathways/recount_canonicalPathways_PCAmodel.rds"))
c6 = readRDS(file.path(model_dir, "oncogenicSignatures/recount_oncogenicSignatures_PCAmodel.rds"))
c7 = readRDS(file.path(model_dir, "immunologicalSignatures/recount_immunologicalSignatures_PCAmodel.rds"))
# load validation datasets
load("~/data2/GenomicSuperSignature/data/TCGA_validationDatasets.rda")
dataset = TCGA_validationDatasets

# color setup
colfunc = colorRampPalette(c("white", "red"))
n = 20
col = colfunc(n)[c(1,2,n)]

2 Canonical Pathways (c2cp)

2.1 Validation

2.2 MeSH

drawWordcloud(c2cp, ind = 49, rm.noise = 4, weighted = TRUE)

drawWordcloud(c2cp, ind = 109, rm.noise = 4, weighted = TRUE)

2.3 GSEA

subsetPathways(c2cp, c(49, 109))
## DataFrame with 20 rows and 2 columns
##                                                                   PCcluster49
##                                                                      <factor>
## Up_1                                                    BIOCARTA_TRKA_PATHWAY
## Up_2                                  REACTOME_SCAVENGING_OF_HEME_FROM_PLASMA
## Up_3                                                   BIOCARTA_GATA3_PATHWAY
## Up_4                                                    BIOCARTA_MTOR_PATHWAY
## Up_5                                                    BIOCARTA_PYK2_PATHWAY
## ...                                                                       ...
## Down_6                          REACTOME_ORGANELLE_BIOGENESIS_AND_MAINTENANCE
## Down_7                                              REACTOME_SIGNALING_BY_WNT
## Down_8                                          REACTOME_MITOTIC_PROMETAPHASE
## Down_9  REACTOME_THE_CITRIC_ACID_TCA_CYCLE_AND_RESPIRATORY_ELECTRON_TRANSPORT
## Down_10                                            KEGG_WNT_SIGNALING_PATHWAY
##                                                                              PCcluster109
##                                                                                  <factor>
## Up_1                                                                        KEGG_RIBOSOME
## Up_2    REACTOME_NONSENSE_MEDIATED_DECAY_NMD_INDEPENDENT_OF_THE_EXON_JUNCTION_COMPLEX_EJC
## Up_3                                                 REACTOME_SELENOAMINO_ACID_METABOLISM
## Up_4                                                 REACTOME_NONSENSE_MEDIATED_DECAY_NMD
## Up_5                 REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE
## ...                                                                                   ...
## Down_6                                                           REACTOME_CILIUM_ASSEMBLY
## Down_7                                              REACTOME_C_TYPE_LECTIN_RECEPTORS_CLRS
## Down_8                                                      REACTOME_MITOTIC_PROMETAPHASE
## Down_9                                                    REACTOME_ESR_MEDIATED_SIGNALING
## Down_10                                                    REACTOME_SIGNALING_BY_HEDGEHOG

2.4 Smallest/Largest PCclusters

x = c2cp

indMin = which.min(metadata(x)$size)
indMax = which.max(metadata(x)$size)

metadata(x)$size[indMin]
## [1] 2
metadata(x)$size[indMax]
## [1] 127
drawWordcloud(x, indMin)

drawWordcloud(x, indMax)

3 Oncogenic Signatures (c6)

3.1 Validation

3.2 MeSH

drawWordcloud(c6, ind = 68, rm.noise = 4, weighted = TRUE)

drawWordcloud(c6, ind = 136, rm.noise = 4, weighted = TRUE)

3.3 GSEA

subsetPathways(c6, c(68, 136)) 
## DataFrame with 20 rows and 2 columns
##                PCcluster68           PCcluster136
##                   <factor>               <factor>
## Up_1    BCAT_BILD_ET_AL_DN           PGF_UP.V1_UP
## Up_2            TBK1.DF_DN         ERBB2_UP.V1_DN
## Up_3              EIF4E_DN             TBK1.DF_DN
## Up_4          PGF_UP.V1_UP        VEGF_A_UP.V1_DN
## Up_5        ERBB2_UP.V1_DN       RB_P130_DN.V1_DN
## ...                    ...                    ...
## Down_6       E2F1_UP.V1_UP  ESC_J1_UP_EARLY.V1_DN
## Down_7            STK33_UP           MEK_UP.V1_DN
## Down_8          TBK1.DF_UP      PRC2_EED_UP.V1_DN
## Down_9       EGFR_UP.V1_DN         HOXA9_DN.V1_UP
## Down_10       MEK_UP.V1_DN GCNP_SHH_UP_LATE.V1_UP

3.4 Smallest/Largest PCclusters

x = c6

indMin = which.min(metadata(x)$size)
indMax = which.max(metadata(x)$size)

metadata(x)$size[indMin]
## [1] 3
metadata(x)$size[indMax]
## [1] 119
drawWordcloud(x, indMin)

drawWordcloud(x, indMax)

4 Immunological Signatures (c7)

4.1 Validation

4.2 MeSH

drawWordcloud(c7, ind = 75, rm.noise = 4, weighted = TRUE)

drawWordcloud(c7, ind = 76, rm.noise = 4, weighted = TRUE)

4.3 GESA

subsetPathways(c7, c(75, 76))
## DataFrame with 20 rows and 2 columns
##                                                                                    PCcluster75
##                                                                                       <factor>
## Up_1                                    GSE2405_0H_VS_24H_A_PHAGOCYTOPHILUM_STIM_NEUTROPHIL_UP
## Up_2                  GSE27241_WT_VS_RORGT_KO_TH17_POLARIZED_CD4_TCELL_TREATED_WITH_DIGOXIN_UP
## Up_3                                     GSE2405_0H_VS_9H_A_PHAGOCYTOPHILUM_STIM_NEUTROPHIL_DN
## Up_4                               GSE26030_TH1_VS_TH17_RESTIMULATED_DAY5_POST_POLARIZATION_UP
## Up_5                                                     GSE14000_TRANSLATED_RNA_VS_MRNA_DC_DN
## ...                                                                                        ...
## Down_6                                           GSE13484_12H_VS_3H_YF17D_VACCINE_STIM_PBMC_UP
## Down_7                                                    GSE17721_PAM3CSK4_VS_CPG_24H_BMDC_DN
## Down_8                                                     GSE45365_WT_VS_IFNAR_KO_CD11B_DC_UP
## Down_9  GSE19888_ADENOSINE_A3R_INH_PRETREAT_AND_ACT_BY_A3R_VS_TCELL_MEMBRANES_ACT_MAST_CELL_UP
## Down_10                                                GSE17721_12H_VS_24H_GARDIQUIMOD_BMDC_UP
##                                                     PCcluster76
##                                                        <factor>
## Up_1            GSE9006_TYPE_1_VS_TYPE_2_DIABETES_PBMC_AT_DX_UP
## Up_2          GSE7596_AKT_TRANSD_VS_CTRL_CD4_TCONV_WITH_TGFB_UP
## Up_3            GSE5099_UNSTIM_VS_MCSF_TREATED_MONOCYTE_DAY7_UP
## Up_4                GSE6269_HEALTHY_VS_STAPH_AUREUS_INF_PBMC_UP
## Up_5         GSE37416_CTRL_VS_6H_F_TULARENSIS_LVS_NEUTROPHIL_DN
## ...                                                         ...
## Down_6                         GSE27786_CD8_TCELL_VS_NKTCELL_DN
## Down_7                       GSE3982_MAST_CELL_VS_NEUTROPHIL_UP
## Down_8            GSE13484_12H_VS_3H_YF17D_VACCINE_STIM_PBMC_DN
## Down_9                   GSE2770_IL4_ACT_VS_ACT_CD4_TCELL_2H_DN
## Down_10 GSE411_UNSTIM_VS_100MIN_IL6_STIM_SOCS3_KO_MACROPHAGE_DN

4.4 Smallest/Largest PCclusters

x = c7

indMin = which.min(metadata(x)$size)
indMax = which.max(metadata(x)$size)

metadata(x)$size[indMin]
## [1] 3
metadata(x)$size[indMax]
## [1] 229
drawWordcloud(x, indMin)

drawWordcloud(x, indMax)