suppressPackageStartupMessages({
library(PCAGenomicSignatures)
library(dplyr)
library(ggplot2)
})
TCGA_validationDatasets is a list containing 6 TCGA datasets: COAD, BRCA, LUAD,
READ, UCEC, and OV. First 5 are raw read counts from GSEABenchmarkeR package with
log2(count + 1) transformation. Also, GSEABenchmarkeR::loadEData excluded genes with
cpm < 2 in more than half of the samples. TCGA-OV dataset is from curatedOvarianData
package. This training dataset file was build using R/build_TCGA_validationDatasets.R
script.
load("data/TCGA_validationDatasets.rda")
names(TCGA_validationDatasets)
## [1] "COAD" "BRCA" "LUAD" "READ" "UCEC" "OV"
# not using OV for this vigenette
datasets <- TCGA_validationDatasets[1:5]
This PCAmodel is built using top 20 PCs of 536 studies. The number of cluster was
decided by d=2.25 and MSigDB C2 gene sets are used for GSEA-based annotation.
data.dir <- system.file("extdata", package = "PCAGenomicSignaturesPaper")
PCAmodel <- readRDS(file.path(data.dir, "PCAmodel_C2.rds"))
PCAmodel
## class: PCAGenomicSignatures
## dim: 13934 4764
## metadata(6): cluster size ... MeSH_freq updateNote
## assays(1): model
## rownames(13934): CASKIN1 DDX3Y ... CTC-457E21.9 AC007966.1
## rowData names(0):
## colnames(4764): RAV1 RAV2 ... RAV4763 RAV4764
## colData names(4): RAV studies silhouetteWidth gsea
## trainingData(2): PCAsummary MeSH
## trainingData names(536): DRP000987 SRP059172 ... SRP164913 SRP188526
updateNote(PCAmodel)
## [1] "536 refine.bio studies/ top 90% varying genes/ GSEA with MSigDB C2"
Here, we plot the heatmapTable of validation result from multiple studies. By matching this result with metadata, you can identify RAVs that are more strongly associated with specific features.
# This process takes little time due to the size of datasets.
val_all <- validate(datasets, PCAmodel)
heatmapTable(val_all)
It seems like RAV221 and RAV868 are specific to BRCA while RAV832 is strongly associated with colon/rectal cancers.
If you provide validation result from one dataset, heatmapTable include the average
silhouette width as a reference.
val_coad <- validate(datasets[["COAD"]], PCAmodel)
heatmapTable(val_coad)
val_read <- validate(datasets[["READ"]], PCAmodel)
heatmapTable(val_read)
RAV221 shows the highest validation score with a positive avgerage silhouette width.
val_brca <- validate(datasets[["BRCA"]], PCAmodel)
heatmapTable(val_brca)
RAV868 didn’t score top 5, so we checked a couple more top validated RAVs. RAV868 is ranked 6th based on the score with negative average silhouette width.
heatmapTable(val_brca, num.out = 7)
RAV221 and RAV868 are specific to BRCA.
RAV221 consists of three breast cancer studies (ERP016798, SRP023262, and SRP11343) and top 10 enriched pathways are associated with breast cancer.
ind <- 221
findStudiesInCluster(PCAmodel, ind, studyTitle = TRUE)
## studyName
## 279 ERP016798
## 977 SRP023262
## 4275 SRP111343
## title
## 279 Whole transcriptome profiling of 63 breast cancer tumours
## 977 A shared transcriptional program in early breast neoplasias despite genetic and clinical distinctions
## 4275 RNAseq analysis of chemotherapy and radiation therapy-naïve breast tumors
subsetEnrichedPathways(PCAmodel, ind) %>% as.data.frame
## RAV221
## Up_1 SMID_BREAST_CANCER_BASAL_DN
## Up_2 DOANE_BREAST_CANCER_ESR1_UP
## Up_3 SMID_BREAST_CANCER_LUMINAL_B_UP
## Up_4 VANTVEER_BREAST_CANCER_ESR1_UP
## Up_5 LIEN_BREAST_CARCINOMA_METAPLASTIC_VS_DUCTAL_DN
## Up_6 CHARAFE_BREAST_CANCER_LUMINAL_VS_BASAL_UP
## Up_7 CHARAFE_BREAST_CANCER_LUMINAL_VS_MESENCHYMAL_UP
## Up_8 SMID_BREAST_CANCER_RELAPSE_IN_BONE_UP
## Up_9 SMID_BREAST_CANCER_RELAPSE_IN_BRAIN_DN
## Up_10 POOLA_INVASIVE_BREAST_CANCER_DN
drawWordcloud(PCAmodel, ind)
ind <- 868
findStudiesInCluster(PCAmodel, ind, studyTitle = TRUE)
## studyName
## 773 SRP014428
## 4275 SRP111343
## 5720 SRP158730
## 5843 SRP163173
## 5960 SRP169094
## title
## 773 Full-length mRNA-Seq from single-cell levels of RNA and individual circulating tumor cells
## 4275 RNAseq analysis of chemotherapy and radiation therapy-naïve breast tumors
## 5720 Separation of breast cancer and organ microenvironment transcriptomes in metastases
## 5843 Integrative epigenetic taxonomy of primary prostate cancer [RNA-Seq]
## 5960 On-Treatment Biomarkers Improve Prediction of Response to Neoadjuvant Chemotherapy in Breast Cancer
subsetEnrichedPathways(PCAmodel, ind) %>% as.data.frame
## RAV868
## Up_1 CHARAFE_BREAST_CANCER_LUMINAL_VS_MESENCHYMAL_UP
## Up_2 LIEN_BREAST_CARCINOMA_METAPLASTIC_VS_DUCTAL_DN
## Up_3 DOANE_BREAST_CANCER_ESR1_UP
## Up_4 CHARAFE_BREAST_CANCER_LUMINAL_VS_BASAL_UP
## Up_5 ROSTY_CERVICAL_CANCER_PROLIFERATION_CLUSTER
## Up_6 LIM_MAMMARY_STEM_CELL_DN
## Up_7 COLDREN_GEFITINIB_RESISTANCE_DN
## Up_8 DOANE_BREAST_CANCER_CLASSES_UP
## Up_9 SOTIRIOU_BREAST_CANCER_GRADE_1_VS_3_UP
## Up_10 VANTVEER_BREAST_CANCER_ESR1_UP
drawWordcloud(PCAmodel, ind)
ind <- 832
findStudiesInCluster(PCAmodel, ind, studyTitle = TRUE)
## studyName
## 681 SRP010181
## 1074 SRP029880
## 2543 SRP068591
## 2792 SRP073267
## 4655 SRP123604
## title
## 681 Derivation of HLA types from shotgun sequence datasets
## 1074 Gene expression profiling study by RNA-seq in colorectal cancer
## 2543 Gene signature in sessile serrated polyps identifies colon cancer subtype
## 2792 Impact of RNA degradation on fusion detection by RNA-seq
## 4655 Immune Profiling of Premalignant Lesions in Patients with Lynch Syndrome
subsetEnrichedPathways(PCAmodel, ind) %>% as.data.frame
## RAV832
## Up_1 SABATES_COLORECTAL_ADENOMA_UP
## Up_2 HSIAO_LIVER_SPECIFIC_GENES
## Up_3 GRADE_COLON_AND_RECTAL_CANCER_UP
## Up_4 REACTOME_EUKARYOTIC_TRANSLATION_ELONGATION
## Up_5 REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE
## Up_6 KEGG_RIBOSOME
## Up_7 LEE_LIVER_CANCER_ACOX1_DN
## Up_8 REACTOME_SELENOAMINO_ACID_METABOLISM
## Up_9 KOBAYASHI_EGFR_SIGNALING_24HR_DN
## Up_10 REACTOME_REGULATION_OF_EXPRESSION_OF_SLITS_AND_ROBOS
drawWordcloud(PCAmodel, ind)
Here, we added SLE-WB microarray dataset and 4 colon cancer microarray dataasets
to 5 TCGA dataset and scoreCutoff is set to 0.68 instead of the default 0.7.
names(new_datasets)
## [1] "COAD" "BRCA" "LUAD" "READ" "UCEC" "SLE"
## [7] "GSE14095" "GSE17536" "GSE2109" "GSE39582"
Based on this multi-datasets validation table,
- RAV23 and RAV1551 are SLE-specific
- RAV188 seems to be COAD-specific, different from RAV832 which is associated with both COAD and READ
ind <- 23
findStudiesInCluster(PCAmodel, ind, studyTitle = TRUE)
## studyName
## 16 DRP001953
## 524 ERP114104
## 1749 SRP051848
## 2024 SRP059039
## 3329 SRP089814
## 4244 SRP110609
## 4533 SRP118733
## 4977 SRP132018
## 5135 SRP136057
## 5140 SRP136108
## 5452 SRP150419
## 5462 SRP150595
## title
## 16 Interactive Transcriptome Analysis of Malaria Patients and Infecting Plasmodium falciparum in Indonesia
## 524 Altered Gene Expression in Antipsychotic Induced Weight Gain
## 1749 Gene Networks Specific for Innate Immunity Define Post-traumatic Stress Disorder [RNA-Seq]
## 2024 Elucidating the etiology and molecular pathogenicity of infectious diarrhea by high throughput RNA sequencing
## 3329 Differentially Expressed Gene Transcripts Using RNA Sequencing from the Blood of Immunosuppressed Kidney Allograft Recipients
## 4244 RNA-sequencing analysis of response to P.falciparum infection in Fulani and Mossi ethnic groups, Burkina Faso
## 4533 Transcriptomic analysis of Multiple Myeloma bone marrow microenvironment
## 4977 In-vitro stimulation of healthy donor blood with IL-3 cytokine
## 5135 Whole Blood Transcriptome Profiling in Juvenile Idiopathic Arthritis and Inflammatory Bowel Disease
## 5140 RNA-seq of nine primary human cell types exposed in vitro to methylprednisolone
## 5452 Haemopedia: Human Haematopoietic Gene Expression
## 5462 Homo sapiens Transcriptome or Gene expression
subsetEnrichedPathways(PCAmodel, ind) %>% as.data.frame
## RAV23
## Up_1 KEGG_RIBOSOME
## Up_2 REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE
## Up_3 REACTOME_EUKARYOTIC_TRANSLATION_ELONGATION
## Up_4 REACTOME_SELENOAMINO_ACID_METABOLISM
## Up_5 REACTOME_RRNA_PROCESSING
## Up_6 REACTOME_TRANSLATION
## Up_7 MANALO_HYPOXIA_DN
## Up_8 CAIRO_HEPATOBLASTOMA_CLASSES_UP
## Up_9 PUJANA_BRCA2_PCC_NETWORK
## Up_10 WONG_EMBRYONIC_STEM_CELL_CORE
drawWordcloud(PCAmodel, ind)
ind <- 1551
findStudiesInCluster(PCAmodel, ind, studyTitle = TRUE)
## studyName
## 1740 SRP051688
## 1904 SRP056840
## 2209 SRP062966
## 2699 SRP071965
## 4031 SRP105369
## 4914 SRP131037
## 5462 SRP150595
## 5481 SRP150872
## title
## 1740 A Cell-based Systems Biology Assessment of Human Blood to Monitor Immune Responses After Influenza Vaccination
## 1904 Renal systems biology of patients with systemic inflammatory response syndrome
## 2209 SLE lupus RNA-seq
## 2699 A blood RNA signature for tuberculosis disease risk: a prospective cohort study
## 4031 Transcriptome analysis of G protein-coupled receptors in distinct genetic subgroups of acute myeloid leukemia: identification of potential disease-specific targets
## 4914 Using Next-Generation Sequencing Transcriptomics to Determine Markers of Post-traumatic Symptoms - preliminary findings from a post-deployment cohort
## 5462 Homo sapiens Transcriptome or Gene expression
## 5481 Discovering in vivo cytokine eQTL interactions from a lupus clinical trial
subsetEnrichedPathways(PCAmodel, ind) %>% as.data.frame
## RAV1551
## Up_1 REACTOME_NEUTROPHIL_DEGRANULATION
## Up_2 THEILGAARD_NEUTROPHIL_AT_SKIN_WOUND_DN
## Up_3 ALTEMEIER_RESPONSE_TO_LPS_WITH_MECHANICAL_VENTILATION
## Up_4 VERHAAK_AML_WITH_NPM1_MUTATED_UP
## Up_5 VERHAAK_GLIOBLASTOMA_MESENCHYMAL
## Up_6 HAHTOLA_MYCOSIS_FUNGOIDES_CD4_UP
## Up_7 BROWN_MYELOID_CELL_DEVELOPMENT_UP
## Up_8 TAKEDA_TARGETS_OF_NUP98_HOXA9_FUSION_8D_DN
## Up_9 LENAOUR_DENDRITIC_CELL_MATURATION_DN
## Up_10 SMIRNOV_CIRCULATING_ENDOTHELIOCYTES_IN_CANCER_UP
drawWordcloud(PCAmodel, ind)
ind <- 188
findStudiesInCluster(PCAmodel, ind, studyTitle = TRUE)
## studyName
## 220 ERP013206
## 351 ERP023272
## 357 ERP023550
## 1074 SRP029880
## 3037 SRP077046
## 5411 SRP149847
## title
## 220 Whole transcriptome profiling of Esophageal adenocarcinoma and Barrett's
## 351 RNA-seq of formalin-fixed, paraffin-embedded uninvolved terminal ileal tissue obtained from ileo-colic resection surgeries of Crohn’s disease and control patients
## 357 RNA-seq of human intestinal organoids colonized with E. coli and other immature intestinal tissues
## 1074 Gene expression profiling study by RNA-seq in colorectal cancer
## 3037 A functional genomics predictive network model identifies regulators of inflammatory bowel disease: Mount Sinai Hospital (MSH) Population Specimen Collection and Profiling of Inflammatory Bowel Disease
## 5411 Differences in tissue immune cell populations following hematopoietic stem cell transplantation in Crohn's disease patients
subsetEnrichedPathways(PCAmodel, ind, both=TRUE) %>% as.data.frame
## RAV188
## Up_1 SCHUETZ_BREAST_CANCER_DUCTAL_INVASIVE_UP
## Up_2 VECCHI_GASTRIC_CANCER_ADVANCED_VS_EARLY_UP
## Up_3 LIM_MAMMARY_STEM_CELL_UP
## Up_4 ANASTASSIOU_MULTICANCER_INVASIVENESS_SIGNATURE
## Up_5 BOQUEST_STEM_CELL_UP
## Up_6 CHARAFE_BREAST_CANCER_LUMINAL_VS_MESENCHYMAL_DN
## Up_7 LINDGREN_BLADDER_CANCER_CLUSTER_2B
## Up_8 PICCALUGA_ANGIOIMMUNOBLASTIC_LYMPHOMA_UP
## Up_9 TURASHVILI_BREAST_LOBULAR_CARCINOMA_VS_DUCTAL_NORMAL_UP
## Up_10 SMID_BREAST_CANCER_NORMAL_LIKE_UP
## Down_1 WAMUNYOKOLI_OVARIAN_CANCER_LMP_UP
## Down_2 HOLLERN_EMT_BREAST_TUMOR_DN
## Down_3 CHARAFE_BREAST_CANCER_LUMINAL_VS_MESENCHYMAL_UP
## Down_4 LIM_MAMMARY_STEM_CELL_DN
## Down_5 VECCHI_GASTRIC_CANCER_ADVANCED_VS_EARLY_DN
## Down_6 COLDREN_GEFITINIB_RESISTANCE_DN
## Down_7 WOO_LIVER_CANCER_RECURRENCE_DN
## Down_8 REACTOME_RESPIRATORY_ELECTRON_TRANSPORT_ATP_SYNTHESIS_BY_CHEMIOSMOTIC_COUPLING_AND_HEAT_PRODUCTION_BY_UNCOUPLING_PROTEINS
## Down_9 REACTOME_RESPIRATORY_ELECTRON_TRANSPORT
## Down_10 WAMUNYOKOLI_OVARIAN_CANCER_GRADES_1_2_UP
drawWordcloud(PCAmodel, ind)