suppressPackageStartupMessages({
library(SummarizedExperiment)
library(ExperimentHub)
library(tissueTreg)
library(GenomicSuperSignature)
})
RAVmodel <- readRDS("~/GSS/GenomicSuperSignaturePaper/inst/extdata/RAVmodel_C2.rds")
RAVmodel_mmu <- readRDS("/mnt/STORE1/16tb_b/refinebio_mice/mouse_colon/refinebioRseq_mmu_colon_RAVmodel_MSigDB_mmu_C2_20220810.rds")
Use the data from tissueTreg package, RNA-seq data from tissue T regulatory cells from mice.
The package provides ready to use epigenomes (obtained from TWGBS) and transcriptomes (RNA-seq) from various tissues as obtained in the study (Delacher and Imbusch 2017, PMID: 28783152). Regulatory T cells (Treg cells) perform two distinct functions: they maintain self-tolerance, and they support organ homeostasis by differentiating into specialized tissue Treg cells. The underlying dataset characterises the epigenetic and transcriptomic modifications for specialized tissue Treg cells.
eh <- ExperimentHub()
se_rpkms <- eh[["EH1074"]]
Gene symbols for this dataset:
head(rowData(se_rpkms))
#> DataFrame with 6 rows and 1 column
#> id_symbol
#> <character>
#> ENSMUSG00000030105 Arl8b
#> ENSMUSG00000042428 Mgat3
#> ENSMUSG00000096054 Syne1
#> ENSMUSG00000046532 Ar
#> ENSMUSG00000020495 Smg8
#> ENSMUSG00000058979 Cecr5
Remove low-expressing genes
## Use the same default filter as `GSEABenchmarkeR::loadEData`
## Exclude genes with cpm < 2 in more than half of the samples
ind <- which(rowSums(assay(se_rpkms)<2) >=8) # 12,126 low-expressing genes
filtered <- se_rpkms[-ind,] # 8,235 gene x 15 samples
Log transformation to make the data follow normal distribution
dat <- log2(assay(filtered) + 1)
hist(dat)
Change EnsemblID to gene symbols
rownames(dat) <- rowData(filtered)$id_symbol
dat[1:6, 1:6]
#> Fat-Treg-R1 Fat-Treg-R2 Fat-Treg-R3 Liver-Treg-R1 Liver-Treg-R2
#> Arl8b 4.906891 3.584963 4.321928 2.807355 3.584963
#> Smg8 1.584963 0.000000 1.584963 1.000000 1.584963
#> Cecr5 1.584963 2.321928 2.321928 3.000000 3.906891
#> Nol8 2.584963 2.807355 3.459432 3.807355 3.584963
#> Smox 2.321928 2.000000 2.321928 2.000000 2.321928
#> Nop9 2.807355 1.000000 1.584963 1.584963 1.000000
#> Liver-Treg-R3
#> Arl8b 4.087463
#> Smg8 1.584963
#> Cecr5 3.000000
#> Nol8 3.000000
#> Smox 2.321928
#> Nop9 1.584963
Check orthologs between human and mice:
se_rpkms) have human orthologs.filtered) have human orthologs.library(Orthology.eg.db)
## Get the list of Gene IDs for mice
mKeys <- keys(Orthology.eg.db, "Mus.musculus")
## Map all the available mouse genes to human genes
mice2human <- select(Orthology.eg.db, mKeys, "Homo.sapiens", "Mus.musculus")
library(org.Hs.eg.db)
library(org.Mm.eg.db)
Mm_obj <- as.character(mice2human$Mus.musculus) %>% as.list
mice2human$Mm_GS <- EnrichmentBrowser::idMap(obj = Mm_obj, org = "mmu",
from = "ENTREZID", to = "SYMBOL")
Hs_obj <- as.character(mice2human$Homo.sapiens) %>% as.list
mice2human$Hs_GS <- EnrichmentBrowser::idMap(obj = Hs_obj, org = "hsa",
from = "ENTREZID", to = "SYMBOL")
# saveRDS(mice2human, "miceHumanOrthologs.rds")
## Convert mouse genes to human gene ortholog
Mm_genes <- rowData(filtered)$id_symbol # 8,235 genes in filtered matrix
## Mouse genes that were used in the input and have human orthologs
cg_ind <- which(Mm_genes %in% mice2human$Mm_GS & !isEmpty(mice2human$Hs_GS))
cg <- Mm_genes[cg_ind] # 7,184 mice genes have human ortholog info
## Make a named list, `orth_map`
orth_map <- as.list(mice2human$Hs_GS)
sum(sapply(orth_map, isEmpty)) # 45 empty entries
#> [1] 45
names(orth_map) <- mice2human$Mm_GS # 16,730 mice-human orthologs
## Subset the input data to the genes having human orthologs
dat_sub <- filtered[cg_ind,]
rowData(dat_sub)$Hs_ortholog <- orth_map[cg]
assay_sub <- assay(dat_sub)
rownames(assay_sub) <- rowData(dat_sub)$Hs_ortholog
val_all <- validate(log2(assay_sub + 1), RAVmodel)
#> Warning in irlba::irlba(as.matrix(t(dat[gene_common, ])), nv = 8, center =
#> TRUE, : You're computing too large a percentage of total singular values, use a
#> standard svd instead.
heatmapTable(val_all, RAVmodel, num.out = 5, swCutoff = 0)
ind <- 2244
drawWordcloud(RAVmodel, ind)
subsetEnrichedPathways(RAVmodel, ind, n = 5, both = TRUE) %>% as.data.frame()
#> RAV2244.Description
#> Up_1 FEVR_CTNNB1_TARGETS_UP
#> Up_2 WONG_ADULT_TISSUE_STEM_MODULE
#> Up_3 KRIGE_RESPONSE_TO_TOSEDOSTAT_24HR_UP
#> Up_4 REACTOME_ADAPTIVE_IMMUNE_SYSTEM
#> Up_5 BOQUEST_STEM_CELL_CULTURED_VS_FRESH_UP
#> Down_1 ALTEMEIER_RESPONSE_TO_LPS_WITH_MECHANICAL_VENTILATION
#> Down_2 MCLACHLAN_DENTAL_CARIES_UP
#> Down_3 LI_INDUCED_T_TO_NATURAL_KILLER_UP
#> Down_4 SMIRNOV_CIRCULATING_ENDOTHELIOCYTES_IN_CANCER_UP
#> Down_5 VERHAAK_GLIOBLASTOMA_MESENCHYMAL
findStudiesInCluster(RAVmodel, ind, studyTitle = TRUE)
#> studyName PC Variance explained (%)
#> 1 SRP072980 3 4.43
#> 2 SRP076719 2 6.81
#> 3 SRP119636 6 1.59
#> title
#> 1 Stochastic Principles Governing Alternative Splicing of RNA
#> 2 Multiple Origins of Virus Persistence during Natural Control of HIV Infection
#> 3 Integration of genome-wide analysis to characterize long noncoding RNAs in diverse immune cell types of the stage IV melanoma patients
val_ind <- validatedSignatures(val_all, RAVmodel, indexOnly = TRUE)
for (i in val_ind) {
print(paste("Enriched MeSH terms for RAV", i))
drawWordcloud(RAVmodel, i)
}
#> [1] "Enriched MeSH terms for RAV 2244"
#> [1] "Enriched MeSH terms for RAV 522"
#> [1] "Enriched MeSH terms for RAV 4260"
#> [1] "Enriched MeSH terms for RAV 203"
#> [1] "Enriched MeSH terms for RAV 1455"
for (i in val_ind) {
print(paste("Associated studies for RAV", i))
res <- findStudiesInCluster(RAVmodel, i, studyTitle = TRUE)
print(res)
}
#> [1] "Associated studies for RAV 2244"
#> studyName PC Variance explained (%)
#> 1 SRP072980 3 4.43
#> 2 SRP076719 2 6.81
#> 3 SRP119636 6 1.59
#> title
#> 1 Stochastic Principles Governing Alternative Splicing of RNA
#> 2 Multiple Origins of Virus Persistence during Natural Control of HIV Infection
#> 3 Integration of genome-wide analysis to characterize long noncoding RNAs in diverse immune cell types of the stage IV melanoma patients
#> [1] "Associated studies for RAV 522"
#> studyName PC Variance explained (%)
#> 1 ERP104864 11 0.86
#> 2 SRP075396 3 3.79
#> 3 SRP097735 3 3.71
#> 4 SRP103099 16 0.99
#> title
#> 1 Comprehensive transcriptome analysis of synovium and peripheral blood reveals major disease heterogeneity early in the RA disease process prior to therapeutic intervention
#> 2 Xenograft sequencing to elucidate molecular events following anti-CD44 treatment
#> 3 Neuroblastoma cells undergo transcriptomic alterations during dissemination into the bone marrow and subsequent tumor progression
#> 4 Patient derived xenograft models of leukemia and lymphoma whole transcriptome sequencing
#> [1] "Associated studies for RAV 4260"
#> studyName PC Variance explained (%)
#> 1 SRP149978 2 3.28
#> 2 SRP155483 3 3.18
#> 3 SRP158994 3 3.75
#> title
#> 1 RNA-seq of HIV bnAb and control individuals PBMC
#> 2 White blood cells from rheumatoid arthritis patients and matched healthy donors
#> 3 Longitudinal transcriptomic characterization of the immune response to acute hepatitis C virus infection
#> [1] "Associated studies for RAV 203"
#> studyName PC Variance explained (%)
#> 1 ERP013260 4 5.09
#> 2 SRP049605 2 3.63
#> 3 SRP055390 3 4.83
#> 4 SRP066197 2 2.74
#> 5 SRP095405 4 4.23
#> title
#> 1 Transcriptional landscape of human tissue lymphocytes unveils\nuniqueness of tumor-infiltrating T regulatory cells
#> 2 Identification of a Molecular Signature for Acute Lyme Disease by Human Transcriptome Profiling
#> 3 Transcriptome analysis in chronic lymphocytic leukemia cells using RNA sequencing (RNA-seq)
#> 4 Transcriptional profiling of TH2 cells identifies pathogenic features associated with asthma
#> 5 Identification of genes induced by NOTCH1 in a chronic lymphocytic leukaemia (CLL) cell line and tracking of these genes in primary CLL patients
#> [1] "Associated studies for RAV 1455"
#> studyName PC Variance explained (%)
#> 1 SRP050272 3 3.34
#> 2 SRP056197 1 21.16
#> 3 SRP056197 2 10.33
#> 4 SRP056295 2 7.50
#> 5 SRP062144 1 39.70
#> title
#> 1 Expression and Prognostic impact of LncRNAs in Acute Myeloid Leukemia
#> 2 Leucegene: AML sequencing (part 4)
#> 3 Leucegene: AML sequencing (part 4)
#> 4 Leucegene: AML sequencing (part 5)
#> 5 Integrated analysis of MLL-AF9 AML patients and model leukemias highlights RET and other novel therapeutic targets (RNA-seq AML development)
org <- c("hsa", # Homo sapiens (human)
"mmu", # Mus musculus (mouse)
"cel", # Caenorhabditis elegans (nematode)
"dme", # Drosophila melanogaster (fruit fly)
"dre", # Danio rerio (zebrafish)
"sce") # Saccharomyces cereviseae (budding yeast)