Installation and Setup

if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager", ask = FALSE)

install_if_missing <- function(package_name, is_bioc = FALSE, ask_install = TRUE) {
    if (!requireNamespace(package_name, quietly = TRUE)) {
        if (is_bioc) {
            BiocManager::install(package_name, update = FALSE, ask = ask_install)
        } else {
            install.packages(package_name, ask = ask_install)
        }
    }
}

cran_packages <- c("tidyverse", "RColorBrewer", "pheatmap", "ggrepel", "cowplot")
bioc_packages <- c("DESeq2", "tximport", "biomaRt", "DEGreport", "apeglm", "org.Hs.eg.db")

invisible(lapply(cran_packages, install_if_missing, is_bioc = FALSE, ask_install = FALSE))
invisible(lapply(bioc_packages, install_if_missing, is_bioc = TRUE, ask_install = FALSE))

suppressPackageStartupMessages({
    library(apeglm)
    library(DESeq2)
    library(biomaRt)
    library(cowplot)
    library(ggrepel)
    library(tximport)
    library(pheatmap)
    library(DEGreport)
    library(tidyverse)
    library(RColorBrewer)
    library(org.Hs.eg.db)
})

print("Installation and loading step is complete.")

## [1] "Installation and loading step is complete."

Annotation Gathering

Connect to BioMart

# May need to retry if network times out
human_mart <- useEnsembl(biomart = "ensembl",
                         dataset  = "hsapiens_gene_ensembl",
                         mirror   = "useast")

Retrieve Transcript-to-Gene Mapping

transcript_to_gene <- biomaRt::getBM(
    attributes = c("ensembl_transcript_id", "transcript_version",
                   "ensembl_gene_id", "description", "external_gene_name"),
    mart = human_mart
)

# Combine transcript ID and version to match Salmon format (e.g. ENST00000456328.2)
transcript_to_gene$ensembl_transcript_id <- paste(
    transcript_to_gene$ensembl_transcript_id,
    transcript_to_gene$transcript_version,
    sep = "."
)

Wrangle Final t2g Annotation Table

t2g <- dplyr::rename(
    transcript_to_gene,
    target_id = ensembl_transcript_id,
    ens_gene  = ensembl_gene_id,
    ext_gene  = external_gene_name
)[, c("target_id", "ens_gene", "description", "ext_gene")]

knitr::kable(head(t2g), caption = "Transcript-to-gene annotation table (first 6 rows)")

Transcript-to-gene annotation table (first 6 rows)
target_id	ens_gene	description	ext_gene
ENST00000387314.1	ENSG00000210049	mitochondrially encoded tRNA-Phe (UUU/C) [Source:HGNC Symbol;Acc:HGNC:7481]	MT-TF
ENST00000389680.2	ENSG00000211459	mitochondrially encoded 12S rRNA [Source:HGNC Symbol;Acc:HGNC:7470]	MT-RNR1
ENST00000387342.1	ENSG00000210077	mitochondrially encoded tRNA-Val (GUN) [Source:HGNC Symbol;Acc:HGNC:7500]	MT-TV
ENST00000387347.2	ENSG00000210082	mitochondrially encoded 16S rRNA [Source:HGNC Symbol;Acc:HGNC:7471]	MT-RNR2
ENST00000386347.1	ENSG00000209082	mitochondrially encoded tRNA-Leu (UUA/G) 1 [Source:HGNC Symbol;Acc:HGNC:7490]	MT-TL1
ENST00000361390.2	ENSG00000198888	mitochondrially encoded NADH:ubiquinone oxidoreductase core subunit 1 [Source:HGNC Symbol;Acc:HGNC:7455]	MT-ND1

Experimental Design

Build s2c Design Matrix

salmon_dir <- "/courses/BINF6430.202630/students/tran.nhi2/STAR_salmon"

samples      <- list.files(salmon_dir, full.names = TRUE, pattern = ".*_S[0-9]+.*")
salmon_files <- file.path(samples, "quant.sf")
names(salmon_files) <- samples

s2c <- data.frame(
  sample      = sub(".*/", "", samples),
  co_cultured = c(FALSE, TRUE,   # 050A alone,  050M co-cultured
                  FALSE, TRUE,   # 192A alone,  192R co-cultured
                  FALSE, TRUE,   # 247A alone,  247O co-cultured
                  FALSE, TRUE,   # M1SA alone,  M1SB co-cultured
                  FALSE, TRUE,   # OPMA alone,  OPMB co-cultured
                  FALSE, TRUE),  # RPMA alone,  RPMB co-cultured
  replicate   = as.factor(c(1,1, 2,2, 3,3, 4,4, 5,5, 6,6)),
  stringsAsFactors = FALSE
)

s2c$co_cultured <- relevel(as.factor(as.logical(s2c$co_cultured)), ref = "FALSE")
s2c <- dplyr::mutate(s2c, path = samples)

# Split into BMAT (bone marrow adipocyte) and MM (multiple myeloma) subsets
s2c_bmat <- s2c[1:6, ]
s2c_mm   <- s2c[7:12, ]

knitr::kable(s2c, caption = "Full experimental design matrix")

Full experimental design matrix
sample	co_cultured	replicate	path
050A_D708_D502_S11	FALSE	1	/courses/BINF6430.202630/students/tran.nhi2/STAR_salmon/050A_D708_D502_S11
050M_D709_D502_S12	TRUE	1	/courses/BINF6430.202630/students/tran.nhi2/STAR_salmon/050M_D709_D502_S12
192A_D704_D501_S7	FALSE	2	/courses/BINF6430.202630/students/tran.nhi2/STAR_salmon/192A_D704_D501_S7
192R_D705_D501_S8	TRUE	2	/courses/BINF6430.202630/students/tran.nhi2/STAR_salmon/192R_D705_D501_S8
247A_D706_D501_S9	FALSE	3	/courses/BINF6430.202630/students/tran.nhi2/STAR_salmon/247A_D706_D501_S9
247O_D707_D501_S10	TRUE	3	/courses/BINF6430.202630/students/tran.nhi2/STAR_salmon/247O_D707_D501_S10
M1SA_D711_D504_S17	FALSE	4	/courses/BINF6430.202630/students/tran.nhi2/STAR_salmon/M1SA_D711_D504_S17
M1SB_D712_D504_S18	TRUE	4	/courses/BINF6430.202630/students/tran.nhi2/STAR_salmon/M1SB_D712_D504_S18
OPMA_D701_D503_S13	FALSE	5	/courses/BINF6430.202630/students/tran.nhi2/STAR_salmon/OPMA_D701_D503_S13
OPMB_D710_D504_S16	TRUE	5	/courses/BINF6430.202630/students/tran.nhi2/STAR_salmon/OPMB_D710_D504_S16
RPMA_D702_D503_S14	FALSE	6	/courses/BINF6430.202630/students/tran.nhi2/STAR_salmon/RPMA_D702_D503_S14
RPMB_D703_D503_S15	TRUE	6	/courses/BINF6430.202630/students/tran.nhi2/STAR_salmon/RPMB_D703_D503_S15

Define Design Formula

design_formula <- ~replicate + co_cultured

Parsing Salmon Files with tximport

Glob File Paths

files_bmat <- salmon_files[1:6]
files_mm   <- salmon_files[7:12]

Import BMAT Quantifications

print("Reading in BMAT cell quantification data.")

## [1] "Reading in BMAT cell quantification data."

txi_bmat <- tximport(
    files_bmat,
    type                = "salmon",
    tx2gene             = transcript_to_gene[, c("ensembl_transcript_id", "ensembl_gene_id")],
    countsFromAbundance = "lengthScaledTPM"
)
print("Done with BMAT.")

## [1] "Done with BMAT."

Import MM Quantifications

print("Reading in MM cell quantification data.")

## [1] "Reading in MM cell quantification data."

txi_mm <- tximport(
    files_mm,
    type                = "salmon",
    tx2gene             = transcript_to_gene[, c("ensembl_transcript_id", "ensembl_gene_id")],
    countsFromAbundance = "lengthScaledTPM"
)
print("Done with MM.")

## [1] "Done with MM."

Wrangle Count Data

# BMAT
data_bmat <- txi_bmat$counts %>%
    round() %>%
    data.frame()
colnames(data_bmat) <- colnames(txi_bmat$counts)

# MM
data_mm <- txi_mm$counts %>%
    round() %>%
    data.frame()
colnames(data_mm) <- colnames(txi_mm$counts)

Differential Expression with DESeq2

Clean Column Names and Format s2c

colnames(data_bmat) <- str_replace(names(files_bmat),
                                   paste0(salmon_dir, "/"), "") %>%
                       str_replace(".salmon", "")

colnames(data_mm)   <- str_replace(names(files_mm),
                                   paste0(salmon_dir, "/"), "") %>%
                       str_replace(".salmon", "")

rownames(s2c_bmat) <- s2c_bmat$sample
s2c_bmat <- s2c_bmat[, c("co_cultured", "replicate")]

rownames(s2c_mm) <- s2c_mm$sample
s2c_mm <- s2c_mm[, c("co_cultured", "replicate")]

Fit DESeq2 Models

# BMAT
dds_bmat <- DESeqDataSetFromMatrix(
    countData = data_bmat,
    colData   = s2c_bmat,
    design    = design_formula
)
dds_bmat <- DESeq(dds_bmat)

# MM
dds_mm <- DESeqDataSetFromMatrix(
    countData = data_mm,
    colData   = s2c_mm,
    design    = design_formula
)
dds_mm <- DESeq(dds_mm)

Mean-Variance Plots

# BMAT
mean_counts <- apply(data_bmat[, c(2, 4, 6)], 1, mean)
variance_counts <- apply(data_bmat[, c(2, 4, 6)], 1, var)
df <- data.frame(mean_counts, variance_counts)
ggplot(df) +
    geom_point(aes(x = mean_counts, y = variance_counts)) +
    geom_line(aes(x = mean_counts, y = mean_counts, color = "red")) +
    scale_y_log10() + scale_x_log10() +
    ggtitle("BMAT: Mean vs Variance") +
    theme(legend.position = "none")

# MM
mean_counts <- apply(data_mm[, c(2, 4, 6)], 1, mean)
variance_counts <- apply(data_mm[, c(2, 4, 6)], 1, var)
df <- data.frame(mean_counts, variance_counts)
ggplot(df) +
    geom_point(aes(x = mean_counts, y = variance_counts)) +
    geom_line(aes(x = mean_counts, y = mean_counts, color = "red")) +
    scale_y_log10() + scale_x_log10() +
    ggtitle("MM: Mean vs Variance") +
    theme(legend.position = "none")

Dispersion Estimates

plotDispEsts(dds_bmat, main = "BMAT Dispersion Estimates")

plotDispEsts(dds_mm,   main = "MM Dispersion Estimates")

PCA and Batch Correction

# BMAT - before batch correction
vsd_bmat <- vst(dds_bmat, blind = FALSE)
pca_bmat <- plotPCA(vsd_bmat, intgroup = c("co_cultured", "replicate"), returnData = TRUE)
percentVar <- round(100 * attr(pca_bmat, "percentVar"))

pca_bmat_before <- ggplot(pca_bmat, aes(PC1, PC2, color = co_cultured)) +
    geom_point(size = 2) +
    xlim(-10, 10) + ylim(-10, 10) +
    xlab(paste0("PC1: ", percentVar[1], "% variance")) +
    ylab(paste0("PC2: ", percentVar[2], "% variance")) +
    coord_fixed() +
    ggtitle("BMAT Before Batch Correction")

# BMAT - batch correction
assay(vsd_bmat) <- limma::removeBatchEffect(assay(vsd_bmat), vsd_bmat$replicate)
pca_bmat <- plotPCA(vsd_bmat, intgroup = c("co_cultured", "replicate"), returnData = TRUE)
percentVar <- round(100 * attr(pca_bmat, "percentVar"))

pca_bmat_after <- ggplot(pca_bmat, aes(PC1, PC2, color = co_cultured)) +
    geom_point(size = 2) +
    xlim(-5, 5) + ylim(-5, 5) +
    xlab(paste0("PC1: ", percentVar[1], "% variance")) +
    ylab(paste0("PC2: ", percentVar[2], "% variance")) +
    coord_fixed() +
    ggtitle("BMAT After Batch Correction")

title <- ggdraw() + draw_label("BMAT alone vs co_cultured", fontface = "bold", x = 0, hjust = 0)
plot_grid(title, pca_bmat_before, pca_bmat_after, ncol = 1, rel_heights = c(0.1, 1, 1))

# MM - before batch correction
vsd_mm <- vst(dds_mm, blind = FALSE)
pca_mm <- plotPCA(vsd_mm, intgroup = c("co_cultured", "replicate"), returnData = TRUE)
percentVar <- round(100 * attr(pca_mm, "percentVar"))

pcamm1 <- ggplot(pca_mm, aes(PC1, PC2, color = co_cultured)) +
    geom_point(size = 2) +
    xlim(-20, 20) + ylim(-20, 20) +
    xlab(paste0("PC1: ", percentVar[1], "% variance")) +
    ylab(paste0("PC2: ", percentVar[2], "% variance")) +
    coord_fixed() +
    ggtitle("MM Before Batch Correction")

# MM - batch correction
assay(vsd_mm) <- limma::removeBatchEffect(assay(vsd_mm), vsd_mm$replicate)
pca_mm <- plotPCA(vsd_mm, intgroup = c("co_cultured", "replicate"), returnData = TRUE)
percentVar <- round(100 * attr(pca_mm, "percentVar"))

pcamm2 <- ggplot(pca_mm, aes(PC1, PC2, color = co_cultured)) +
    geom_point(size = 2) +
    xlim(-10, 10) + ylim(-10, 10) +
    xlab(paste0("PC1: ", percentVar[1], "% variance")) +
    ylab(paste0("PC2: ", percentVar[2], "% variance")) +
    coord_fixed() +
    ggtitle("MM After Batch Correction")

title <- ggdraw() + draw_label("MM alone vs co_cultured", fontface = "bold", x = 0, hjust = 0)
plot_grid(title, pcamm1, pcamm2, ncol = 1, rel_heights = c(0.1, 1, 1))

LFC Shrinkage

contrast <- c("co_cultured", "TRUE", "FALSE")

# BMAT
res_bmat        <- results(dds_bmat, contrast = contrast, alpha = 0.1)
res_bmat_before <- res_bmat
res_bmat        <- lfcShrink(dds_bmat, res = res_bmat,
                              coef = "co_cultured_TRUE_vs_FALSE", type = "apeglm")

plotMA(res_bmat_before, ylim = c(-2, 2), cex = 0.8, main = "BMAT Before Shrinkage")
abline(h = c(-1, 1), col = "red", lwd = 2)

plotMA(res_bmat, ylim = c(-2, 2), cex = 0.8, main = "BMAT After Shrinkage")
abline(h = c(-1, 1), col = "red", lwd = 2)

# MM
res_mm        <- results(dds_mm, contrast = contrast, alpha = 0.1)
res_mm_before <- res_mm
res_mm        <- lfcShrink(dds_mm, res = res_mm,
                            coef = "co_cultured_TRUE_vs_FALSE", type = "apeglm")

plotMA(res_mm_before, ylim = c(-2, 2), cex = 0.8, main = "MM Before Shrinkage")
abline(h = c(-1, 1), col = "red", lwd = 2)

plotMA(res_mm, ylim = c(-2, 2), cex = 0.8, main = "MM After Shrinkage")
abline(h = c(-1, 1), col = "red", lwd = 2)

DE Results Summary

# BMAT
summary(res_bmat, alpha = 0.1)

## 
## out of 23567 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 40, 0.17%
## LFC < 0 (down)     : 10, 0.042%
## outliers [1]       : 0, 0%
## low counts [2]     : 5891, 25%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

res_bmat_tb <- res_bmat %>%
    data.frame() %>%
    rownames_to_column(var = "gene") %>%
    as_tibble()

# MM
summary(res_mm, alpha = 0.1)

## 
## out of 22474 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 29, 0.13%
## LFC < 0 (down)     : 38, 0.17%
## outliers [1]       : 0, 0%
## low counts [2]     : 16337, 73%
## (mean count < 196)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

res_mm_tb <- res_mm %>%
    data.frame() %>%
    rownames_to_column(var = "gene") %>%
    as_tibble()

Full DE Gene Lists

# BMAT
DE_bmat <- res_bmat_tb %>%
    filter(padj < 0.1 & abs(log2FoldChange) >= 1)

DE_bmat$symbol <- mapIds(org.Hs.eg.db, keys = DE_bmat$gene,
                         column = "SYMBOL", keytype = "ENSEMBL", multiVals = "first")
DE_bmat$entrez <- mapIds(org.Hs.eg.db, keys = DE_bmat$gene,
                         column = "ENTREZID", keytype = "ENSEMBL", multiVals = "first")

write.csv(DE_bmat, "DE_BMAT.csv", row.names = TRUE)
knitr::kable(head(DE_bmat), caption = "Top DE genes in BMAT")

Top DE genes in BMAT
gene	baseMean	log2FoldChange	lfcSE	pvalue	padj	symbol	entrez
ENSG00000005844	21.44944	3.295877	1.9260744	0.0002595	0.0975866	ITGAL	3683
ENSG00000012048	150.28148	-1.051429	0.3515882	0.0000878	0.0480040	BRCA1	672
ENSG00000099998	345.31411	1.067162	0.2602939	0.0000014	0.0023955	GGT5	2687
ENSG00000103426	10.80147	6.717144	2.9615569	0.0000630	0.0397916	CORO7-PAM16	100529144
ENSG00000132207	25.14079	-4.862172	1.2520456	0.0000028	0.0041134	SLX1B	79008
ENSG00000159307	250.30474	1.163067	0.2696740	0.0000007	0.0014518	SCUBE1	80274

# MM
DE_mm <- res_mm_tb %>%
    filter(padj < 0.1 & abs(log2FoldChange) >= 1)

DE_mm$symbol <- mapIds(org.Hs.eg.db, keys = DE_mm$gene,
                       column = "SYMBOL", keytype = "ENSEMBL", multiVals = "first")
DE_mm$entrez <- mapIds(org.Hs.eg.db, keys = DE_mm$gene,
                       column = "ENTREZID", keytype = "ENSEMBL", multiVals = "first")

write.csv(DE_mm, "DE_MM.csv", row.names = TRUE)
knitr::kable(head(DE_mm), caption = "Top DE genes in MM")

Top DE genes in MM
gene	baseMean	log2FoldChange	lfcSE	pvalue	padj	symbol	entrez
ENSG00000100628	253.2037	-1.403301	0.4000929	1.1e-05	0.0037581	ASB2	51676
ENSG00000134107	437.9017	1.385086	0.3069167	2.0e-07	0.0001682	BHLHE40	8553
ENSG00000138496	260.1226	2.973312	0.6607364	0.0e+00	0.0000230	PARP9	83666
ENSG00000157514	1170.0315	2.115485	0.3442793	0.0e+00	0.0000000	TSC22D3	1831
ENSG00000168209	1212.6055	1.962096	0.2663245	0.0e+00	0.0000000	DDIT4	54541
ENSG00000168610	686.1559	1.020562	0.2283592	3.0e-07	0.0002136	STAT3	6774

Sanity Checks: Sample Correlation Heatmaps

# BMAT
vst_mat_bmat <- assay(vsd_bmat)
vst_cor_bmat <- cor(vst_mat_bmat)
bmat_annotation_heatmap <- data.frame(row.names = colnames(vst_mat_bmat),
                                       s2c_bmat$co_cultured)
pheatmap(vst_cor_bmat,
         annotation_col = bmat_annotation_heatmap,
         main           = "BMAT sample correlation",
         show_rownames  = FALSE,
         show_colnames  = FALSE)

# MM
vst_mat_mm <- assay(vsd_mm)
vst_cor_mm <- cor(vst_mat_mm)
mm_annotation_heatmap <- data.frame(row.names = colnames(vst_mat_mm),
                                     s2c_mm$co_cultured)
pheatmap(vst_cor_mm,
         annotation_col = mm_annotation_heatmap,
         main           = "MM sample correlation",
         show_rownames  = FALSE,
         show_colnames  = FALSE)

Expression Heatmaps of DE Genes

# Shared annotation table
grch38annot <- transcript_to_gene %>%
    dplyr::select(ensembl_gene_id, external_gene_name) %>%
    dplyr::distinct()

heat_colors <- brewer.pal(6, "YlOrRd")

# BMAT
normalized_counts_bmat <- counts(dds_bmat, normalized = TRUE) %>%
    data.frame() %>%
    rownames_to_column(var = "gene")

names(normalized_counts_bmat) <- sub("X", "", names(normalized_counts_bmat))

normalized_counts_bmat <- merge(normalized_counts_bmat, grch38annot,
                                 by.x = "gene", by.y = "ensembl_gene_id") %>%
    as_tibble()

norm_sig_bmat <- normalized_counts_bmat %>%
    filter(gene %in% DE_bmat$gene)

pheatmap(norm_sig_bmat[, 2:7],
         color         = heat_colors,
         cluster_rows  = TRUE,
         show_rownames = FALSE,
         annotation_col = s2c_bmat,
         border_color  = NA,
         fontsize      = 10,
         scale         = "row",
         height        = 20,
         drop_levels   = TRUE,
         main          = "BMAT DE Genes")

# MM
normalized_counts_mm <- counts(dds_mm, normalized = TRUE) %>%
    data.frame() %>%
    rownames_to_column(var = "gene")

normalized_counts_mm <- merge(normalized_counts_mm, grch38annot,
                               by.x = "gene", by.y = "ensembl_gene_id") %>%
    as_tibble()

norm_sig_mm <- normalized_counts_mm %>%
    filter(gene %in% DE_mm$gene)

pheatmap(norm_sig_mm[, 2:7],
         color         = heat_colors,
         cluster_rows  = TRUE,
         show_rownames = FALSE,
         annotation    = s2c_mm,
         border_color  = NA,
         fontsize      = 10,
         scale         = "row",
         height        = 20,
         drop_levels   = TRUE,
         main          = "MM DE Genes")

Volcano Plots

# BMAT
res_bmat_tb <- na.omit(res_bmat_tb %>%
    mutate(threshold_OE = padj < 0.1 & abs(log2FoldChange) >= 1))

res_bmat_tb <- bind_cols(res_bmat_tb,
    symbol = grch38annot$external_gene_name[match(res_bmat_tb$gene,
                                                   grch38annot$ensembl_gene_id)])
res_bmat_tb <- res_bmat_tb %>%
    mutate(genelabels = "") %>%
    arrange(padj)

res_bmat_tb$genelabels[res_bmat_tb$threshold_OE == TRUE] <-
    as.character(res_bmat_tb$symbol[res_bmat_tb$threshold_OE == TRUE])

ggplot(res_bmat_tb, aes(x = log2FoldChange, y = -log10(padj))) +
    geom_point(aes(colour = threshold_OE)) +
    geom_text_repel(aes(label = genelabels)) +
    ggtitle("BMAT alone vs co_cultured") +
    xlab("log2 fold change") +
    ylab("-log10 adjusted p-value") +
    theme(legend.position  = "none",
          plot.title       = element_text(size = rel(1.5), hjust = 0.5),
          axis.title       = element_text(size = rel(1.25)))

# MM
res_mm_tb <- na.omit(res_mm_tb %>%
    mutate(threshold_OE = padj < 0.1 & abs(log2FoldChange) >= 1))

res_mm_tb <- bind_cols(res_mm_tb,
    symbol = grch38annot$external_gene_name[match(res_mm_tb$gene,
                                                   grch38annot$ensembl_gene_id)])
res_mm_tb <- res_mm_tb %>%
    mutate(genelabels = "") %>%
    arrange(padj)

res_mm_tb$genelabels[res_mm_tb$threshold_OE == TRUE] <-
    as.character(res_mm_tb$symbol[res_mm_tb$threshold_OE == TRUE])

ggplot(res_mm_tb, aes(x = log2FoldChange, y = -log10(padj))) +
    geom_point(aes(colour = threshold_OE)) +
    geom_text_repel(aes(label = genelabels)) +
    ggtitle("MM alone vs co_cultured") +
    xlab("log2 fold change") +
    ylab("-log10 adjusted p-value") +
    theme(legend.position  = "none",
          plot.title       = element_text(size = rel(1.5), hjust = 0.5),
          axis.title       = element_text(size = rel(1.25)))

DEGreport Visualizations

# BMAT
dds_bmat_deg <- dds_bmat
rowData(dds_bmat_deg)$symbol <- mapIds(org.Hs.eg.db, keys = rownames(dds_bmat_deg),
                                        column = "SYMBOL", keytype = "ENSEMBL",
                                        multiVals = "first")
rowData(dds_bmat_deg)$gene <- rownames(dds_bmat_deg)

degPlot(dds     = dds_bmat_deg,
        res     = DE_bmat,
        n       = nrow(DE_bmat),
        genes   = DE_bmat$gene,
        xs      = "co_cultured",
        group   = "co_cultured",
        ann     = c("gene", "symbol"),
        batch   = "replicate")

degVolcano(
    data.frame(res_bmat_tb[, c("log2FoldChange", "padj")]),
    plot_text = data.frame(res_bmat_tb[res_bmat_tb$threshold_OE == TRUE,
                                        c("log2FoldChange", "padj", "genelabels")])
)

# MM
dds_mm_deg <- dds_mm
rowData(dds_mm_deg)$symbol <- mapIds(org.Hs.eg.db, keys = rownames(dds_mm_deg),
                                      column = "SYMBOL", keytype = "ENSEMBL",
                                      multiVals = "first")
rowData(dds_mm_deg)$gene <- rownames(dds_mm_deg)

DE_mm_new <- DE_mm %>%
    group_by(symbol) %>%
    slice_min(padj, n = 1) %>%
    ungroup()

degPlot(dds     = dds_mm_deg,
        res     = DE_mm_new,
        n       = nrow(DE_mm_new),
        genes   = DE_mm_new$gene,
        xs      = "co_cultured",
        group   = "co_cultured",
        ann     = c("gene", "symbol"),
        batch   = "replicate")

degVolcano(
    data.frame(res_mm_tb[, c("log2FoldChange", "padj")]),
    plot_text = data.frame(res_mm_tb[res_mm_tb$threshold_OE == TRUE,
                                      c("log2FoldChange", "padj", "genelabels")])
)

Session Info

sessionInfo()

## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Etc/UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] org.Hs.eg.db_3.22.0         AnnotationDbi_1.72.0       
##  [3] RColorBrewer_1.1-3          lubridate_1.9.5            
##  [5] forcats_1.0.1               stringr_1.6.0              
##  [7] dplyr_1.2.0                 purrr_1.2.1                
##  [9] readr_2.2.0                 tidyr_1.3.2                
## [11] tibble_3.3.1                tidyverse_2.0.0            
## [13] DEGreport_1.46.0            pheatmap_1.0.13            
## [15] tximport_1.38.2             ggrepel_0.9.7              
## [17] ggplot2_4.0.2               cowplot_1.2.0              
## [19] biomaRt_2.66.1              DESeq2_1.50.2              
## [21] SummarizedExperiment_1.40.0 Biobase_2.70.0             
## [23] MatrixGenerics_1.22.0       matrixStats_1.5.0          
## [25] GenomicRanges_1.62.1        Seqinfo_1.0.0              
## [27] IRanges_2.44.0              S4Vectors_0.48.0           
## [29] BiocGenerics_0.56.0         generics_0.1.4             
## [31] apeglm_1.32.0               BiocManager_1.30.27        
## 
## loaded via a namespace (and not attached):
##   [1] ggdendro_0.2.0              rstudioapi_0.18.0          
##   [3] jsonlite_2.0.0              shape_1.4.6.1              
##   [5] magrittr_2.0.4              farver_2.1.2               
##   [7] rmarkdown_2.30              GlobalOptions_0.1.3        
##   [9] vctrs_0.7.1                 memoise_2.0.1              
##  [11] htmltools_0.5.9             S4Arrays_1.10.1            
##  [13] progress_1.2.3              curl_7.0.0                 
##  [15] broom_1.0.12                SparseArray_1.10.9         
##  [17] sass_0.4.10                 bslib_0.10.0               
##  [19] plyr_1.8.9                  httr2_1.2.2                
##  [21] cachem_1.1.0                lifecycle_1.0.5            
##  [23] iterators_1.0.14            pkgconfig_2.0.3            
##  [25] Matrix_1.7-4                R6_2.6.1                   
##  [27] fastmap_1.2.0               clue_0.3-67                
##  [29] digest_0.6.39               numDeriv_2016.8-1.1        
##  [31] colorspace_2.1-2            reshape_0.8.10             
##  [33] RSQLite_2.4.6               labeling_0.4.3             
##  [35] filelock_1.0.3              timechange_0.4.0           
##  [37] mgcv_1.9-3                  httr_1.4.8                 
##  [39] abind_1.4-8                 compiler_4.5.2             
##  [41] bit64_4.6.0-1               withr_3.0.2                
##  [43] doParallel_1.0.17           ConsensusClusterPlus_1.74.0
##  [45] S7_0.2.1                    backports_1.5.0            
##  [47] BiocParallel_1.44.0         DBI_1.3.0                  
##  [49] psych_2.6.1                 MASS_7.3-65                
##  [51] rappdirs_0.3.4              DelayedArray_0.36.0        
##  [53] rjson_0.2.23                tools_4.5.2                
##  [55] otel_0.2.0                  glue_1.8.0                 
##  [57] nlme_3.1-168                grid_4.5.2                 
##  [59] cluster_2.1.8.1             gtable_0.3.6               
##  [61] tzdb_0.5.0                  hms_1.1.4                  
##  [63] xml2_1.5.2                  XVector_0.50.0             
##  [65] foreach_1.5.2               pillar_1.11.1              
##  [67] vroom_1.7.0                 emdbook_1.3.14             
##  [69] limma_3.66.0                splines_4.5.2              
##  [71] logging_0.10-108            circlize_0.4.17            
##  [73] BiocFileCache_3.0.0         lattice_0.22-7             
##  [75] bit_4.6.0                   tidyselect_1.2.1           
##  [77] ComplexHeatmap_2.26.1       locfit_1.5-9.12            
##  [79] Biostrings_2.78.0           knitr_1.51                 
##  [81] edgeR_4.8.2                 xfun_0.56                  
##  [83] statmod_1.5.1               stringi_1.8.7              
##  [85] yaml_2.3.12                 evaluate_1.0.5             
##  [87] codetools_0.2-20            bbmle_1.0.25.1             
##  [89] cli_3.6.5                   jquerylib_0.1.4            
##  [91] Rcpp_1.1.1                  dbplyr_2.5.2               
##  [93] coda_0.19-4.1               png_0.1-8                  
##  [95] bdsmatrix_1.3-7             parallel_4.5.2             
##  [97] blob_1.3.0                  prettyunits_1.2.0          
##  [99] mvtnorm_1.3-3               scales_1.4.0               
## [101] crayon_1.5.3                GetoptLong_1.1.0           
## [103] rlang_1.1.7                 KEGGREST_1.50.0            
## [105] mnormt_2.1.2

Project 07: Quantification and Differential Expression using DESeq2

2026-03-09

Installation and Setup

Annotation Gathering

Connect to BioMart

Retrieve Transcript-to-Gene Mapping

Wrangle Final t2g Annotation Table

Experimental Design

Build s2c Design Matrix

Define Design Formula

Parsing Salmon Files with tximport

Glob File Paths

Import BMAT Quantifications

Import MM Quantifications

Wrangle Count Data

Differential Expression with DESeq2

Clean Column Names and Format s2c

Fit DESeq2 Models

Mean-Variance Plots

Dispersion Estimates

PCA and Batch Correction

LFC Shrinkage

DE Results Summary

Full DE Gene Lists

Sanity Checks: Sample Correlation Heatmaps

Expression Heatmaps of DE Genes

Volcano Plots

DEGreport Visualizations

Session Info