.libPaths("/courses/BINF6430.202630/students/tran.nhi2/module06_QC_and_Quantification/renv/library/linux-ubuntu-noble/R-4.5/x86_64-pc-linux-gnu/")
cat("Library path set.\n")## Library path set.
suppressPackageStartupMessages({
library(tidyverse)
library(DESeq2)
library(DEGreport)
library(org.Hs.eg.db)
library(clusterProfiler)
library(pathview)
library(ggplot2)
library(pheatmap)
library(ggrepel)
library(cowplot)
library(enrichplot)
library(ggridges)
library(DOSE)
})
cat("\033[32mAll libraries loaded successfully.\033[0m\n")## [32mAll libraries loaded successfully.[0m
p07_path <- "/courses/BINF6430.202630/students/tran.nhi2/module06_QC_and_Quantification/project07.RData"
if (file.exists(p07_path)) {
load(p07_path)
cat("\033[32mProject 07 workspace loaded successfully.\033[0m\n")
} else {
stop(paste("File not found:", p07_path))
}## [32mProject 07 workspace loaded successfully.[0m
# Add threshold_OE and genelabels if not present
if (!"threshold_OE" %in% colnames(res_bmat_tb)) {
res_bmat_tb <- res_bmat_tb %>%
mutate(
threshold_OE = !is.na(padj) & padj < 0.1 & abs(log2FoldChange) >= 1,
genelabels = ifelse(threshold_OE, gene, "")
)
}
if (!"threshold_OE" %in% colnames(res_mm_tb)) {
res_mm_tb <- res_mm_tb %>%
mutate(
threshold_OE = !is.na(padj) & padj < 0.1 & abs(log2FoldChange) >= 1,
genelabels = ifelse(threshold_OE, gene, "")
)
}
cat("\033[32mthreshold_OE and genelabels columns verified.\033[0m\n")## [32mthreshold_OE and genelabels columns verified.[0m
We use alpha = 0.1 because this is an underpowered pilot
study. A relaxed p-value threshold allows us to include genes that lean
toward statistical significance without demanding the power of a
fully-replicated experiment.
## === BMAT Summary ===
##
## out of 23567 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 40, 0.17%
## LFC < 0 (down) : 10, 0.042%
## outliers [1] : 0, 0%
## low counts [2] : 5891, 25%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## === MM Summary ===
##
## out of 22474 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 29, 0.13%
## LFC < 0 (down) : 38, 0.17%
## outliers [1] : 0, 0%
## low counts [2] : 16337, 73%
## (mean count < 196)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
We filter by padj < 0.1 and
|log2FoldChange| >= 1. We map ENSEMBL IDs to gene
symbols (for readability) and Entrez IDs (required by KEGG and Disease
Ontology).
# ── BMAT ─────────────────────────────────────────────────────────────────────
DE_bmat <- res_bmat_tb %>%
filter(padj < 0.1 & abs(log2FoldChange) >= 1)
DE_bmat$symbol <- mapIds(org.Hs.eg.db, keys = DE_bmat$gene,
column = "SYMBOL", keytype = "ENSEMBL", multiVals = "first")
DE_bmat$entrez <- mapIds(org.Hs.eg.db, keys = DE_bmat$gene,
column = "ENTREZID", keytype = "ENSEMBL", multiVals = "first")
write.csv(DE_bmat, "DE_BMAT.csv", row.names = TRUE)
cat(sprintf("BMAT: %d DE genes after filtering\n", nrow(DE_bmat)))## BMAT: 14 DE genes after filtering
| gene | baseMean | log2FoldChange | lfcSE | pvalue | padj | threshold_OE | symbol | genelabels | entrez |
|---|---|---|---|---|---|---|---|---|---|
| ENSG00000273768 | 81.53610 | -10.838511 | 3.0497175 | 0e+00 | 0.0000000 | TRUE | LOC124904613 | RNVU1-29 | 124904613 |
| ENSG00000177084 | 365.08065 | 1.685195 | 0.2540747 | 0e+00 | 0.0000000 | TRUE | POLE | POLE | 5426 |
| ENSG00000170522 | 1658.62441 | 1.138227 | 0.1770923 | 0e+00 | 0.0000000 | TRUE | ELOVL6 | ELOVL6 | 79071 |
| ENSG00000174697 | 267.24841 | 1.479131 | 0.2847458 | 0e+00 | 0.0000264 | TRUE | LEP | LEP | 3952 |
| ENSG00000260914 | 22.85321 | 8.456502 | 2.8325525 | 0e+00 | 0.0001048 | TRUE | NA | NA | |
| ENSG00000159307 | 250.30474 | 1.163067 | 0.2696740 | 7e-07 | 0.0014518 | TRUE | SCUBE1 | SCUBE1 | 80274 |
# ── MM ───────────────────────────────────────────────────────────────────────
DE_mm <- res_mm_tb %>%
filter(padj < 0.1 & abs(log2FoldChange) >= 1)
DE_mm$symbol <- mapIds(org.Hs.eg.db, keys = DE_mm$gene,
column = "SYMBOL", keytype = "ENSEMBL", multiVals = "first")
DE_mm$entrez <- mapIds(org.Hs.eg.db, keys = DE_mm$gene,
column = "ENTREZID", keytype = "ENSEMBL", multiVals = "first")
write.csv(DE_mm, "DE_MM.csv", row.names = TRUE)
cat(sprintf("MM: %d DE genes after filtering\n", nrow(DE_mm)))## MM: 11 DE genes after filtering
| gene | baseMean | log2FoldChange | lfcSE | pvalue | padj | threshold_OE | symbol | genelabels | entrez |
|---|---|---|---|---|---|---|---|---|---|
| ENSG00000168209 | 1212.6055 | 1.962096 | 0.2663245 | 0e+00 | 0.0000000 | TRUE | DDIT4 | DDIT4 | 54541 |
| ENSG00000184557 | 220.5533 | 3.982983 | 0.6178745 | 0e+00 | 0.0000000 | TRUE | SOCS3 | SOCS3 | 9021 |
| ENSG00000157514 | 1170.0315 | 2.115485 | 0.3442793 | 0e+00 | 0.0000000 | TRUE | TSC22D3 | TSC22D3 | 1831 |
| ENSG00000173821 | 2025.1851 | 1.303084 | 0.2124927 | 0e+00 | 0.0000000 | TRUE | RNF213 | RNF213 | 57674 |
| ENSG00000138496 | 260.1226 | 2.973312 | 0.6607364 | 0e+00 | 0.0000230 | TRUE | PARP9 | PARP9 | 83666 |
| ENSG00000134107 | 437.9017 | 1.385086 | 0.3069167 | 2e-07 | 0.0001682 | TRUE | BHLHE40 | BHLHE40 | 8553 |
These heatmaps verify that samples cluster as expected. Co-cultured samples should group separately from alone samples. A clean 1:1 diagonal confirms each sample correlates perfectly with itself.
# ── BMAT ─────────────────────────────────────────────────────────────────────
vst_mat_bmat <- assay(vsd_bmat)
vst_cor_bmat <- cor(vst_mat_bmat)
bmat_annotation <- data.frame(
row.names = colnames(vst_mat_bmat),
co_cultured = s2c_bmat$co_cultured
)
pheatmap(vst_cor_bmat, annotation_col = bmat_annotation,
main = "BMAT Sample Correlation",
show_rownames = FALSE, show_colnames = FALSE)# ── MM ───────────────────────────────────────────────────────────────────────
vst_mat_mm <- assay(vsd_mm)
vst_cor_mm <- cor(vst_mat_mm)
mm_annotation <- data.frame(
row.names = colnames(vst_mat_mm),
co_cultured = s2c_mm$co_cultured
)
pheatmap(vst_cor_mm, annotation_col = mm_annotation,
main = "MM Sample Correlation",
show_rownames = FALSE, show_colnames = FALSE)degVolcano(
data.frame(res_bmat_tb[, c("log2FoldChange", "padj")]),
plot_text = data.frame(
res_bmat_tb[res_bmat_tb$threshold_OE == TRUE,
c("log2FoldChange", "padj", "genelabels")])
)degVolcano(
data.frame(res_mm_tb[, c("log2FoldChange", "padj")]),
plot_text = data.frame(
res_mm_tb[res_mm_tb$threshold_OE == TRUE,
c("log2FoldChange", "padj", "genelabels")])
)An MA plot shows log2 fold change (M) on the y-axis versus mean normalized expression (A) on the x-axis. Genes near M = 0 are similarly expressed between conditions. Genes above are upregulated in co-cultured cells; below are downregulated.
# Build DEGSets from the original split model dds objects
res_bmat_degset <- degComps(dds_bmat, contrast = list("co_cultured_TRUE_vs_FALSE"))
res_mm_degset <- degComps(dds_mm, contrast = list("co_cultured_TRUE_vs_FALSE"))
# Basic MA plots for BMAT and MM
degMA(res_bmat_degset, raw = TRUE, diff = 1)The x-axis (A) represents mean normalized expression and the y-axis (M) is the log2 fold change between co-cultured and alone conditions. Genes near M = 0 show little difference between conditions. Red/colored points are significantly DE (padj < 0.1, |LFC| ≥ 1), while gray points are not significant. The “fanning” pattern at low expression is expected — lowly expressed genes have higher variance, producing more scatter around zero.
# Side-by-side: standard MA (bottom) vs correlation MA (top)
ma1 <- degMA(res_mm_degset, correlation = TRUE, diff = 1)
ma2 <- degMA(res_mm_degset, diff = 1)
plot_grid(ma2, ma1, align = "v", nrow = 2, ncol = 1)These two-panel plots come from
degMAwithraw = TRUE(top panel) andcorrelation = TRUE(bottom panel).The top panel shows raw unshrunken fold changes vs mean expression. The dramatic vertical spread of blue points at low expression levels represents genes with extreme but unreliable fold change estimates caused by low counts — this illustrates exactly why shrinkage is necessary. Red points are significant genes.
The bottom panel plots the shrunken fold change (y-axis) against the unshrunken fold change (x-axis). The tight diagonal relationship for moderately and highly expressed genes shows that shrinkage changes these estimates only slightly. However, genes with very large unshrunken fold changes get pulled dramatically toward zero by apeglm — this is the shrinkage doing its job, removing noise from lowly expressed genes. The red significant genes cluster near the center-right diagonal, confirming that our DE calls come from genes where shrunken and unshrunken estimates agree reasonably well, indicating reliable results.
# ── BMAT: ENSEMBL ─────────────────────────────────────────────────────────────
original_gene_list_bmat <- DE_bmat$log2FoldChange
names(original_gene_list_bmat) <- DE_bmat$gene
gene_list_bmat <- sort(na.omit(original_gene_list_bmat), decreasing = TRUE)
# ── BMAT: Entrez ──────────────────────────────────────────────────────────────
original_gene_list_bmat_entrez <- DE_bmat$log2FoldChange
names(original_gene_list_bmat_entrez) <- DE_bmat$entrez
gene_list_bmat_entrez <- sort(na.omit(original_gene_list_bmat_entrez), decreasing = TRUE)
# ── MM: ENSEMBL ───────────────────────────────────────────────────────────────
original_gene_list_mm <- DE_mm$log2FoldChange
names(original_gene_list_mm) <- DE_mm$gene
gene_list_mm <- sort(na.omit(original_gene_list_mm), decreasing = TRUE)
# ── MM: Entrez ────────────────────────────────────────────────────────────────
original_gene_list_mm_entrez <- DE_mm$log2FoldChange
names(original_gene_list_mm_entrez) <- DE_mm$entrez
gene_list_mm_entrez <- sort(na.omit(original_gene_list_mm_entrez), decreasing = TRUE)
cat("BMAT gene list length:", length(gene_list_bmat), "\n")## BMAT gene list length: 14
## MM gene list length: 11
# ── BMAT ORA ──────────────────────────────────────────────────────────────────
gse_bmat_enrich <- tryCatch(
enrichGO(names(gene_list_bmat), ont = "BP", keyType = "ENSEMBL",
minGSSize = 3, maxGSSize = 800, pvalueCutoff = 0.1,
OrgDb = org.Hs.eg.db, pAdjustMethod = "none"),
error = function(e) { cat("BMAT enrichGO skipped:", e$message, "\n"); NULL }
)
# ── BMAT GSEA ─────────────────────────────────────────────────────────────────
gse_bmat <- tryCatch(
gseGO(geneList = gene_list_bmat, ont = "BP", keyType = "ENSEMBL",
minGSSize = 3, maxGSSize = 800, pvalueCutoff = 1,
verbose = TRUE, OrgDb = org.Hs.eg.db, pAdjustMethod = "none"),
error = function(e) { cat("BMAT gseGO skipped:", e$message, "\n"); NULL }
)
gse_bmat_pairwise <- if (!is.null(gse_bmat)) pairwise_termsim(gse_bmat) else NULL
# ── MM ORA ────────────────────────────────────────────────────────────────────
gse_mm_enrich <- tryCatch(
enrichGO(names(gene_list_mm), ont = "BP", keyType = "ENSEMBL",
minGSSize = 3, maxGSSize = 800, pvalueCutoff = 0.1,
OrgDb = org.Hs.eg.db, pAdjustMethod = "none"),
error = function(e) { cat("MM enrichGO skipped:", e$message, "\n"); NULL }
)
# ── MM GSEA ───────────────────────────────────────────────────────────────────
gse_mm <- tryCatch(
gseGO(geneList = gene_list_mm, ont = "BP", keyType = "ENSEMBL",
minGSSize = 3, maxGSSize = 800, pvalueCutoff = 1,
verbose = TRUE, OrgDb = org.Hs.eg.db, pAdjustMethod = "none"),
error = function(e) { cat("MM gseGO skipped:", e$message, "\n"); NULL }
)
gse_mm_pairwise <- if (!is.null(gse_mm)) pairwise_termsim(gse_mm) else NULL## #
## # over-representation test
## #
## #...@organism Homo sapiens
## #...@ontology BP
## #...@keytype ENSEMBL
## #...@gene chr [1:14] "ENSG00000260914" "ENSG00000289685" "ENSG00000103426" ...
## #...pvalues adjusted by 'none' with cutoff <0.1
## #...515 enriched terms found
## 'data.frame': 515 obs. of 12 variables:
## $ ID : chr "GO:0006631" "GO:0044283" "GO:0032787" "GO:0097305" ...
## $ Description : chr "fatty acid metabolic process" "small molecule biosynthetic process" "monocarboxylic acid metabolic process" "response to alcohol" ...
## $ GeneRatio : chr "4/9" "4/9" "4/9" "3/9" ...
## $ BgRatio : chr "456/21338" "689/21338" "691/21338" "311/21338" ...
## $ RichFactor : num 0.00877 0.00581 0.00579 0.00965 0.0303 ...
## $ FoldEnrichment: num 20.8 13.8 13.7 22.9 71.8 ...
## $ zScore : num 8.78 7 6.98 7.98 11.84 ...
## $ pvalue : num 2.38e-05 1.19e-04 1.21e-04 2.41e-04 3.34e-04 ...
## $ p.adjust : num 2.38e-05 1.19e-04 1.21e-04 2.41e-04 3.34e-04 ...
## $ qvalue : num 0.00975 0.01647 0.01647 0.0209 0.0209 ...
## $ geneID : chr "ENSG00000174697/ENSG00000170522/ENSG00000099998/ENSG00000012048" "ENSG00000174697/ENSG00000170522/ENSG00000099998/ENSG00000012048" "ENSG00000174697/ENSG00000170522/ENSG00000099998/ENSG00000012048" "ENSG00000174697/ENSG00000164742/ENSG00000012048" ...
## $ Count : int 4 4 4 3 2 2 3 3 2 2 ...
## #...Citation
## S Xu, E Hu, Y Cai, Z Xie, X Luo, L Zhan, W Tang, Q Wang, B Liu, R Wang, W Xie, T Wu, L Xie, G Yu. Using clusterProfiler to characterize multiomics data. Nature Protocols. 2024, 19(11):3292-3320
## #
## # Gene Set Enrichment Analysis
## #
## #...@organism Homo sapiens
## #...@setType BP
## #...@keytype ENSEMBL
## #...@geneList Named num [1:14] 8.46 6.81 6.72 3.41 3.3 ...
## - attr(*, "names")= chr [1:14] "ENSG00000260914" "ENSG00000289685" "ENSG00000103426" "ENSG00000293495" ...
## #...nPerm
## #...pvalues adjusted by 'none' with cutoff <1
## #...95 enriched terms found
## 'data.frame': 95 obs. of 11 variables:
## $ ID : chr "GO:0008150" "GO:0009987" "GO:0008152" "GO:0044238" ...
## $ Description : chr "biological_process" "cellular process" "metabolic process" "primary metabolic process" ...
## $ setSize : int 9 9 7 7 6 6 8 8 3 3 ...
## $ enrichmentScore: num -0.8 -0.8 -0.714 -0.714 -0.75 ...
## $ NES : num -1.78 -1.78 -1.5 -1.5 -1.5 ...
## $ pvalue : num 0.0143 0.0143 0.043 0.043 0.0558 ...
## $ p.adjust : num 0.0143 0.0143 0.043 0.043 0.0558 ...
## $ qvalue : num 0.558 0.558 0.558 0.558 0.558 ...
## $ rank : num 11 11 10 10 9 9 11 11 5 5 ...
## $ leading_edge : chr "tags=100%, list=79%, signal=60%" "tags=100%, list=79%, signal=60%" "tags=100%, list=71%, signal=57%" "tags=100%, list=71%, signal=57%" ...
## $ core_enrichment: chr "ENSG00000177084/ENSG00000174697/ENSG00000159307/ENSG00000170522/ENSG00000099998/ENSG00000164742/ENSG00000012048"| __truncated__ "ENSG00000177084/ENSG00000174697/ENSG00000159307/ENSG00000170522/ENSG00000099998/ENSG00000164742/ENSG00000012048"| __truncated__ "ENSG00000174697/ENSG00000170522/ENSG00000099998/ENSG00000164742/ENSG00000012048/ENSG00000132207" "ENSG00000174697/ENSG00000170522/ENSG00000099998/ENSG00000164742/ENSG00000012048/ENSG00000132207" ...
## #...Citation
## S Xu, E Hu, Y Cai, Z Xie, X Luo, L Zhan, W Tang, Q Wang, B Liu, R Wang, W Xie, T Wu, L Xie, G Yu. Using clusterProfiler to characterize multiomics data. Nature Protocols. 2024, 19(11):3292-3320
## #
## # over-representation test
## #
## #...@organism Homo sapiens
## #...@ontology BP
## #...@keytype ENSEMBL
## #...@gene chr [1:11] "ENSG00000184557" "ENSG00000224389" "ENSG00000138496" ...
## #...pvalues adjusted by 'none' with cutoff <0.1
## #...547 enriched terms found
## 'data.frame': 547 obs. of 12 variables:
## $ ID : chr "GO:0072539" "GO:0072538" "GO:0042093" "GO:0002294" ...
## $ Description : chr "T-helper 17 cell differentiation" "T-helper 17 type immune response" "T-helper cell differentiation" "CD4-positive, alpha-beta T cell differentiation involved in immune response" ...
## $ GeneRatio : chr "3/11" "3/11" "3/11" "3/11" ...
## $ BgRatio : chr "47/21338" "64/21338" "108/21338" "110/21338" ...
## $ RichFactor : num 0.0638 0.0469 0.0278 0.0273 0.027 ...
## $ FoldEnrichment: num 123.8 90.9 53.9 52.9 52.4 ...
## $ zScore : num 19.1 16.4 12.5 12.4 12.3 ...
## $ pvalue : num 1.63e-06 4.17e-06 2.02e-05 2.13e-05 2.19e-05 ...
## $ p.adjust : num 1.63e-06 4.17e-06 2.02e-05 2.13e-05 2.19e-05 ...
## $ qvalue : num 0.000574 0.000734 0.001285 0.001285 0.001285 ...
## $ geneID : chr "ENSG00000184557/ENSG00000171223/ENSG00000168610" "ENSG00000184557/ENSG00000171223/ENSG00000168610" "ENSG00000184557/ENSG00000171223/ENSG00000168610" "ENSG00000184557/ENSG00000171223/ENSG00000168610" ...
## $ Count : int 3 3 3 3 3 3 3 3 2 2 ...
## #...Citation
## S Xu, E Hu, Y Cai, Z Xie, X Luo, L Zhan, W Tang, Q Wang, B Liu, R Wang, W Xie, T Wu, L Xie, G Yu. Using clusterProfiler to characterize multiomics data. Nature Protocols. 2024, 19(11):3292-3320
## #
## # Gene Set Enrichment Analysis
## #
## #...@organism Homo sapiens
## #...@setType BP
## #...@keytype ENSEMBL
## #...@geneList Named num [1:11] 3.98 3.44 2.97 2.33 2.12 ...
## - attr(*, "names")= chr [1:11] "ENSG00000184557" "ENSG00000224389" "ENSG00000138496" "ENSG00000187323" ...
## #...nPerm
## #...pvalues adjusted by 'none' with cutoff <1
## #...194 enriched terms found
## 'data.frame': 194 obs. of 11 variables:
## $ ID : chr "GO:0006954" "GO:0035239" "GO:0035295" "GO:0006955" ...
## $ Description : chr "inflammatory response" "tube morphogenesis" "tube development" "immune response" ...
## $ setSize : int 3 4 4 5 5 5 4 4 4 3 ...
## $ enrichmentScore: num 0.879 -1 -1 0.827 0.827 ...
## $ NES : num 1.61 -2.21 -2.21 1.63 1.63 ...
## $ pvalue : num 0.0112 0.0118 0.0118 0.0302 0.0302 ...
## $ p.adjust : num 0.0112 0.0118 0.0118 0.0302 0.0302 ...
## $ qvalue : num 0.708 0.708 0.708 0.708 0.708 ...
## $ rank : num 2 5 5 3 3 3 2 2 2 5 ...
## $ leading_edge : chr "tags=67%, list=18%, signal=75%" "tags=100%, list=45%, signal=86%" "tags=100%, list=45%, signal=86%" "tags=60%, list=27%, signal=80%" ...
## $ core_enrichment: chr "ENSG00000184557/ENSG00000224389" "ENSG00000171223/ENSG00000168610/ENSG00000100628" "ENSG00000171223/ENSG00000168610/ENSG00000100628" "ENSG00000184557/ENSG00000224389/ENSG00000138496" ...
## #...Citation
## S Xu, E Hu, Y Cai, Z Xie, X Luo, L Zhan, W Tang, Q Wang, B Liu, R Wang, W Xie, T Wu, L Xie, G Yu. Using clusterProfiler to characterize multiomics data. Nature Protocols. 2024, 19(11):3292-3320
With 14 DE genes in BMAT and 11 in MM, both lists fall below the recommended minimum of 50 genes for reliable ORA. Results are hypothesis-generating only, not statistically rigorous conclusions.
MM shows biologically coherent signals — immune regulation GO terms, T cell differentiation, and regulation of defense response — consistent with the multiple myeloma co-culture biology. Key DE genes like SOCS3, TSC22D3, and DDIT4 are established players in immune signaling and cancer pathways, lending biological face validity to the results. BMAT results are noisier given fewer DE genes, with DNA replication terms likely reflecting small-sample overlap noise rather than a true biological signal.
if (!is.null(gse_bmat_enrich)) {
print(dotplot(gse_bmat_enrich, showCategory = 10) +
ggtitle("BMAT: GO BP Over-Representation"))
} else { cat("BMAT enrichGO: no results to plot\n") }if (!is.null(gse_mm)) {
print(dotplot(gse_mm, showCategory = 10) +
ggtitle("MM: GO BP GSEA"))
} else { cat("MM gseGO: no results to plot\n") }if (!is.null(gse_bmat_pairwise)) {
print(emapplot(gse_bmat_pairwise, showCategory = 10))
} else { cat("BMAT emapplot: no results to plot\n") }if (!is.null(gse_mm_pairwise)) {
print(emapplot(gse_mm_pairwise, showCategory = 10))
} else { cat("MM emapplot: no results to plot\n") }if (!is.null(gse_bmat)) {
print(ridgeplot(gse_bmat) +
labs(x = "Enrichment distribution", title = "BMAT Ridgeplot") +
theme(plot.margin = margin(10, 10, 10, 10)))
} else { cat("BMAT ridgeplot: no results to plot\n") }if (!is.null(gse_mm)) {
print(ridgeplot(gse_mm) +
labs(x = "Enrichment distribution", title = "MM Ridgeplot") +
theme(plot.margin = margin(10, 10, 10, 10)))
} else { cat("MM ridgeplot: no results to plot\n") }gse_bmat_do <- tryCatch(
gseDO(gene_list_bmat_entrez, pvalueCutoff = 1),
error = function(e) { cat("BMAT gseDO skipped:", e$message, "\n"); NULL }
)## BMAT gseDO skipped: NAs in names(stats) are not allowed
gse_mm_do <- tryCatch(
gseDO(gene_list_mm_entrez[!duplicated(names(gene_list_mm_entrez))],
pvalueCutoff = 1),
error = function(e) { cat("MM gseDO skipped:", e$message, "\n"); NULL }
)if (!is.null(gse_mm_do) && nrow(gse_mm_do) > 0) {
print(dotplot(gse_mm_do) + ggtitle("MM: Disease Ontology Enrichment"))
} else { cat("MM DO dotplot: no results to plot\n") }## MM DO dotplot: no results to plot
The MM Disease Ontology analysis returned no results to plot. This is because our 11-gene MM list is too small to achieve statistically meaningful overlap with Disease Ontology terms even at a fully relaxed pvalueCutoff of 1. The BMAT gseDO was similarly skipped due to NAs in the Entrez ID mapping. While disappointing, this absence is itself informative — it reflects the fundamental power limitation of this pilot dataset. It is not a failure of the biology but a consequence of sample size. The GO and KEGG results already point toward immune and metabolic associations consistent with multiple myeloma biology. In a well-powered study, we would expect Disease Ontology analysis to reveal enrichment for hematologic malignancy, immune dysregulation, and bone marrow disease terms directly relevant to the Reagan lab’s research questions. —
kk_bmat <- tryCatch(
enrichKEGG(names(gene_list_bmat_entrez), organism = "hsa",
minGSSize = 3, maxGSSize = 800,
pvalueCutoff = 1, pAdjustMethod = "BH", keyType = "kegg"),
error = function(e) { cat("BMAT KEGG skipped:", e$message, "\n"); NULL }
)
kk_mm <- tryCatch(
enrichKEGG(names(gene_list_mm_entrez), organism = "hsa",
minGSSize = 3, maxGSSize = 800,
pvalueCutoff = 1, pAdjustMethod = "BH", keyType = "kegg"),
error = function(e) { cat("MM KEGG skipped:", e$message, "\n"); NULL }
)
kk_bmat## #
## # over-representation test
## #
## #...@organism hsa
## #...@ontology KEGG
## #...@keytype kegg
## #...@gene chr [1:13] NA "100529144" "84798" "3683" "5426" "3952" "80274" "79071" ...
## #...pvalues adjusted by 'BH' with cutoff <1
## #...75 enriched terms found
## 'data.frame': 75 obs. of 14 variables:
## $ category : chr "Genetic Information Processing" "Metabolism" "Environmental Information Processing" "Environmental Information Processing" ...
## $ subcategory : chr "Replication and repair" "Metabolism of other amino acids" "Signal transduction" "Signaling molecules and interaction" ...
## $ ID : chr "hsa03460" "hsa00430" "hsa04015" "hsa04081" ...
## $ Description : chr "Fanconi anemia pathway" "Taurine and hypotaurine metabolism" "Rap1 signaling pathway" "Hormone signaling" ...
## $ GeneRatio : chr "2/9" "1/9" "2/9" "2/9" ...
## $ BgRatio : chr "54/9370" "17/9370" "212/9370" "219/9370" ...
## $ RichFactor : num 0.03704 0.05882 0.00943 0.00913 0.00893 ...
## $ FoldEnrichment: num 38.56 61.24 9.82 9.51 9.3 ...
## $ zScore : num 8.58 7.71 4.03 3.95 3.9 ...
## $ pvalue : num 0.00114 0.01622 0.01652 0.01757 0.01834 ...
## $ p.adjust : num 0.101 0.183 0.183 0.183 0.183 ...
## $ qvalue : num 0.0915 0.166 0.166 0.166 0.166 ...
## $ geneID : chr "672/79008" "2687" "3683/107" "3952/107" ...
## $ Count : int 2 1 2 2 2 1 1 1 1 1 ...
## #...Citation
## S Xu, E Hu, Y Cai, Z Xie, X Luo, L Zhan, W Tang, Q Wang, B Liu, R Wang, W Xie, T Wu, L Xie, G Yu. Using clusterProfiler to characterize multiomics data. Nature Protocols. 2024, 19(11):3292-3320
## #
## # over-representation test
## #
## #...@organism hsa
## #...@ontology KEGG
## #...@keytype kegg
## #...@gene chr [1:11] "9021" "721" "83666" "1630" "1831" "54541" "8553" "57674" ...
## #...pvalues adjusted by 'BH' with cutoff <1
## #...53 enriched terms found
## 'data.frame': 53 obs. of 14 variables:
## $ category : chr "Organismal Systems" "Organismal Systems" "Organismal Systems" "Human Diseases" ...
## $ subcategory : chr "Endocrine system" "Endocrine system" "Endocrine system" "Endocrine and metabolic disease" ...
## $ ID : chr "hsa04935" "hsa04920" "hsa04917" "hsa04931" ...
## $ Description : chr "Growth hormone synthesis, secretion and action" "Adipocytokine signaling pathway" "Prolactin signaling pathway" "Insulin resistance" ...
## $ GeneRatio : chr "3/7" "2/7" "2/7" "2/7" ...
## $ BgRatio : chr "122/9370" "70/9370" "71/9370" "109/9370" ...
## $ RichFactor : num 0.0246 0.0286 0.0282 0.0183 0.0168 ...
## $ FoldEnrichment: num 32.9 38.2 37.7 24.6 22.5 ...
## $ zScore : num 9.7 8.55 8.49 6.76 6.45 ...
## $ pvalue : num 7.26e-05 1.13e-03 1.16e-03 2.71e-03 3.22e-03 ...
## $ p.adjust : num 0.00385 0.02049 0.02049 0.03415 0.03415 ...
## $ qvalue : num 0.00298 0.01587 0.01587 0.02645 0.02645 ...
## $ geneID : chr "9021/3726/6774" "9021/6774" "9021/6774" "9021/6774" ...
## $ Count : int 3 2 2 2 2 2 2 2 2 2 ...
## #...Citation
## S Xu, E Hu, Y Cai, Z Xie, X Luo, L Zhan, W Tang, Q Wang, B Liu, R Wang, W Xie, T Wu, L Xie, G Yu. Using clusterProfiler to characterize multiomics data. Nature Protocols. 2024, 19(11):3292-3320
if (!is.null(kk_bmat) && nrow(kk_bmat) > 0) {
print(dotplot(kk_bmat, showCategory = 10) + ggtitle("BMAT: KEGG Pathways"))
} else { cat("BMAT KEGG dotplot: no results to plot\n") }if (!is.null(kk_mm) && nrow(kk_mm) > 0) {
print(dotplot(kk_mm, showCategory = 10) + ggtitle("MM: KEGG Pathways"))
} else { cat("MM KEGG dotplot: no results to plot\n") }