Differential expression analysis with DESeq2

# # Install the required packages if not already installed
# install.packages(c("pak"))
# 
# pak::pkg_install(c("BiocManager", "remotes", "here", "tidyverse",        
# "DESeq2", "pheatmap", "RColorBrewer", "ggrepel", "clusterProfiler",
# "enrichplot", "org.Mm.eg.db", "patchwork", "ComplexHeatmap"
# ))
# 
# # Install the course data package
# pak::pak("patterninstitute/OSD758")


# Load packages
library("here")            # package to find your current working directory

## here() starts at /home/students/student01/  rnaseq_counts2bio_course

library("tidyverse")       # packages for data manipulation and visualization

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library("DESeq2")          # differential expression analysis

## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## 
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## 
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## 
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff,
##     table, tapply, union, unique, unsplit, which.max, which.min
## 
## 
## Attaching package: 'S4Vectors'
## 
## The following objects are masked from 'package:lubridate':
## 
##     second, second<-
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## 
## The following object is masked from 'package:tidyr':
## 
##     expand
## 
## The following object is masked from 'package:utils':
## 
##     findMatches
## 
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## 
## The following object is masked from 'package:lubridate':
## 
##     %within%
## 
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## 
## The following object is masked from 'package:purrr':
## 
##     reduce
## 
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## 
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## 
## Attaching package: 'MatrixGenerics'
## 
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## 
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## 
## Attaching package: 'Biobase'
## 
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## 
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians

library("pheatmap")        # heatmaps
library("RColorBrewer")    # color palettes
library("ggrepel")         # repel overlapping text labels in ggplot2 plots
library("clusterProfiler") # for enrichment analysis

## 
## clusterProfiler v4.14.6 Learn more at https://yulab-smu.top/contribution-knowledge-mining/
## 
## Please cite:
## 
## Guangchuang Yu, Li-Gen Wang, Yanyan Han and Qing-Yu He.
## clusterProfiler: an R package for comparing biological themes among
## gene clusters. OMICS: A Journal of Integrative Biology. 2012,
## 16(5):284-287
## 
## Attaching package: 'clusterProfiler'
## 
## The following object is masked from 'package:IRanges':
## 
##     slice
## 
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## 
## The following object is masked from 'package:purrr':
## 
##     simplify
## 
## The following object is masked from 'package:stats':
## 
##     filter

library("enrichplot")      # to draw functional enrichment results

## enrichplot v1.26.6 Learn more at https://yulab-smu.top/contribution-knowledge-mining/
## 
## Please cite:
## 
## Qianwen Wang, Ming Li, Tianzhi Wu, Li Zhan, Lin Li, Meijun Chen, Wenqin
## Xie, Zijing Xie, Erqiang Hu, Shuangbin Xu, Guangchuang Yu. Exploring
## epigenomic datasets by ChIPseeker. Current Protocols. 2022, 2(10): e585

library("org.Mm.eg.db")    # mouse gene annotation database

## Loading required package: AnnotationDbi
## 
## Attaching package: 'AnnotationDbi'
## 
## The following object is masked from 'package:clusterProfiler':
## 
##     select
## 
## The following object is masked from 'package:dplyr':
## 
##     select

library("patchwork")         # combining multiple plots
library("ComplexHeatmap")  # to draw heatmaps

## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.22.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
## ! pheatmap() has been masked by ComplexHeatmap::pheatmap(). Most of the arguments
##    in the original pheatmap() are identically supported in the new function. You 
##    can still use the original function by explicitly calling pheatmap::pheatmap().
## 
## 
## Attaching package: 'ComplexHeatmap'
## 
## The following object is masked from 'package:pheatmap':
## 
##     pheatmap

# Install and load package containing the data
library(OSD758)

## 
## Attaching package: 'OSD758'
## 
## The following object is masked from 'package:Biobase':
## 
##     samples

# Gene expression in Counts
raw_counts <- OSD758::gene_expression(format = "wide", only_expressed_genes = TRUE) 
# View(raw_counts)

# Samples metadata
samples <- OSD758::samples()
# View(samples)

# Create a list to save the QC results
qc <- list()

# You can choose between vst() and rlog() - this tutorial uses vst.
qc$vst <- DESeq2::vst(raw_counts, blind = TRUE)

# Run PCA
qc$pca_vst <- prcomp(t(qc$vst)) 

# Extract the components
qc$components <- qc$pca_vst[["x"]]
qc$components <- tibble::as_tibble(qc$components, rownames = "sample_id")

# Add sample annotations to components for plot coloring
qc$components_annot <-
  dplyr::left_join(qc$components, as.data.frame(samples[, c(1,5,6,8)]), by = "sample_id") |>
  dplyr::relocate(spacecraft, acceleration_source, gravity_class, .after = sample_id)

# Calculate the % variance per component
qc$pca_percent_var <- round(qc$pca_vst$sdev^2/sum(qc$pca_vst$sdev^2)*100)

#
# 2D PCA | Using ggplot2
#

# Color by gravity_class
qc$pca_gravity <-
  ggplot(qc$components_annot, aes(x = PC1, y = PC2, color = gravity_class)) +
  geom_point(size = 1) +
  labs(
    title = "PCA gene expression | Colored by gravity_class",
    x = paste0("PC1 (", qc$pca_percent_var[1], "% variance)"),
    y = paste0("PC2 (", qc$pca_percent_var[2], "% variance)")
  ) +
  theme_minimal()

# Color by accelaration_source
qc$pca_acceleration <-
  ggplot(qc$components_annot, aes(x = PC1, y = PC2, color = acceleration_source)) +
  geom_point(size = 1) +
  labs(
    title = "PCA gene expression | Colored by acceleration_source",
    x = paste0("PC1 (", qc$pca_percent_var[1], "% variance)"),
    y = paste0("PC2 (", qc$pca_percent_var[2], "% variance)")
  ) +
  theme_minimal()

# Color by gravity_class and shape by acceleration_source
qc$pca_gravity_acceleration <-
  ggplot(qc$components_annot, aes(x = PC1, y = PC2, 
                               color = gravity_class,
                               shape = acceleration_source)) +
  geom_point(size = 1) +
  labs(
    title = "PCA gene expression | Colored by gravity_class | Shape acceleration_source",
    x = paste0("PC1 (", qc$pca_percent_var[1], "% variance)"),
    y = paste0("PC2 (", qc$pca_percent_var[2], "% variance)")
  ) +
  theme_minimal()


# Assemble pca plots
qc$pca_gravity_acceleration /
(qc$pca_gravity | qc$pca_acceleration)

# Plot sample to sample distance for hierarchical clustering

# Calculate Euclidean distances between samples (rows) by transposing the matrix with t().
qc$sample_dist_matrix <- as.matrix(dist(t(qc$vst), method = "euclidean"))


# Define a color palette for the heatmap
qc$colors <- colorRampPalette(rev(brewer.pal(9, "Greens")))(255) # function from RColorBrewer package

# Create the heatmap
qc$dist_clustering <- pheatmap::pheatmap(
  qc$sample_dist_matrix,
  cluster_rows = TRUE,
  cluster_cols = TRUE,
  col = qc$colors,
  fontsize_col = 8,
  fontsize_row = 5
)

### Compute pairwise correlation values
qc$sample_corr <- cor(qc$vst)

### Plot heatmap using the correlation matrix
qc$corr_clustering <-
  pheatmap::pheatmap(
    qc$sample_corr,
    cluster_rows = TRUE,
    cluster_cols = TRUE,
    fontsize_row = 5,
    fontsize_col = 8
  )

# Create list to save the analysis objects
de_deseq <- list()

# Check that sample ids match between raw_counts and samples 
# Ensure same content
stopifnot(setequal(colnames(raw_counts), samples$sample_id))

# Reorder columns to match sample order
raw_counts <- raw_counts[, samples$sample_id]

# Make sure the factor levels are ordered so that the desired baseline comes first.
# DESeq2 uses the first level from factors as the baseline.
samples <-
  samples |>
  dplyr::mutate(gravity_class = factor(
    gravity_class,
    levels = c("1.00_G", "0.33_G", "0.66_G", "micro_G")
  ))


# DE Step 1: Create a DESeqDataSet object (dds)
de_deseq$dds <- DESeq2::DESeqDataSetFromMatrix(
  countData = raw_counts,
  colData = samples,
  design = ~ gravity_class
)

# DE Step 2: Run the DESeq function to perform the analysis
de_deseq$dds <- DESeq(de_deseq$dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

## -- replacing outliers and refitting for 452 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)

## estimating dispersions

## fitting model and testing

# Check the design formula
DESeq2::design(de_deseq$dds)

## ~gravity_class

# Check the sample info
SummarizedExperiment::colData(de_deseq$dds)

## DataFrame with 35 rows and 12 columns
##                               sample_id     organism      sex       matrix
##                             <character>  <character> <factor>  <character>
## MHU-8_RTN_FLT_FLT02 MHU-8_RTN_FLT_FLT02 Mus musculus     male Right retina
## MHU-8_RTN_FLT_FLT03 MHU-8_RTN_FLT_FLT03 Mus musculus     male Right retina
## MHU-8_RTN_FLT_FLT04 MHU-8_RTN_FLT_FLT04 Mus musculus     male Right retina
## MHU-8_RTN_FLT_FLT05 MHU-8_RTN_FLT_FLT05 Mus musculus     male Right retina
## MHU-8_RTN_FLT_FLT06 MHU-8_RTN_FLT_FLT06 Mus musculus     male Right retina
## ...                                 ...          ...      ...          ...
## MHU-8_RTN_HGC_HGC08 MHU-8_RTN_HGC_HGC08 Mus musculus     male Right retina
## MHU-8_RTN_HGC_HGC09 MHU-8_RTN_HGC_HGC09 Mus musculus     male Right retina
## MHU-8_RTN_HGC_HGC10 MHU-8_RTN_HGC_HGC10 Mus musculus     male Right retina
## MHU-8_RTN_HGC_HGC11 MHU-8_RTN_HGC_HGC11 Mus musculus     male Right retina
## MHU-8_RTN_HGC_HGC12 MHU-8_RTN_HGC_HGC12 Mus musculus     male Right retina
##                     spacecraft      acceleration_source   gravity gravity_class
##                       <factor>                 <factor> <numeric>      <factor>
## MHU-8_RTN_FLT_FLT02        ISS Centrifugal Acceleration   3.3e-01       0.33_G 
## MHU-8_RTN_FLT_FLT03        ISS Centrifugal Acceleration   6.6e-01       0.66_G 
## MHU-8_RTN_FLT_FLT04        ISS Centrifugal Acceleration   1.0e+00       1.00_G 
## MHU-8_RTN_FLT_FLT05        ISS Orbital Free Fall          1.0e-06       micro_G
## MHU-8_RTN_FLT_FLT06        ISS Centrifugal Acceleration   3.3e-01       0.33_G 
## ...                        ...                      ...       ...           ...
## MHU-8_RTN_HGC_HGC08      Earth      Terrestrial Gravity         1        1.00_G
## MHU-8_RTN_HGC_HGC09      Earth      Terrestrial Gravity         1        1.00_G
## MHU-8_RTN_HGC_HGC10      Earth      Terrestrial Gravity         1        1.00_G
## MHU-8_RTN_HGC_HGC11      Earth      Terrestrial Gravity         1        1.00_G
## MHU-8_RTN_HGC_HGC12      Earth      Terrestrial Gravity         1        1.00_G
##                     weight_at_launch weight_at_euthanasia sizeFactor
##                            <numeric>            <numeric>  <numeric>
## MHU-8_RTN_FLT_FLT02             26.1                27.59   1.143044
## MHU-8_RTN_FLT_FLT03             26.0                24.35   1.255924
## MHU-8_RTN_FLT_FLT04             27.6                26.88   0.968085
## MHU-8_RTN_FLT_FLT05             27.3                28.15   0.731508
## MHU-8_RTN_FLT_FLT06             26.2                24.45   0.906360
## ...                              ...                  ...        ...
## MHU-8_RTN_HGC_HGC08             28.0                28.00   1.189854
## MHU-8_RTN_HGC_HGC09             25.8                26.36   0.693287
## MHU-8_RTN_HGC_HGC10             25.9                26.05   1.206786
## MHU-8_RTN_HGC_HGC11             26.6                26.71   1.129952
## MHU-8_RTN_HGC_HGC12             27.3                26.60   0.832887
##                     replaceable
##                       <logical>
## MHU-8_RTN_FLT_FLT02       FALSE
## MHU-8_RTN_FLT_FLT03       FALSE
## MHU-8_RTN_FLT_FLT04        TRUE
## MHU-8_RTN_FLT_FLT05       FALSE
## MHU-8_RTN_FLT_FLT06       FALSE
## ...                         ...
## MHU-8_RTN_HGC_HGC08        TRUE
## MHU-8_RTN_HGC_HGC09        TRUE
## MHU-8_RTN_HGC_HGC10        TRUE
## MHU-8_RTN_HGC_HGC11        TRUE
## MHU-8_RTN_HGC_HGC12        TRUE

# Display the first rows of the raw counts
head(DESeq2::counts(de_deseq$dds))

##                    MHU-8_RTN_FLT_FLT02 MHU-8_RTN_FLT_FLT03 MHU-8_RTN_FLT_FLT04
## ENSMUSG00000000001                2132                2501                1979
## ENSMUSG00000000028                 109                 125                 142
## ENSMUSG00000000031                  29                  27                  28
## ENSMUSG00000000037                  92                 129                  90
## ENSMUSG00000000049                  22                  22                   9
## ENSMUSG00000000056                3614                5008                3265
##                    MHU-8_RTN_FLT_FLT05 MHU-8_RTN_FLT_FLT06 MHU-8_RTN_FLT_FLT07
## ENSMUSG00000000001                1065                2771                1437
## ENSMUSG00000000028                  60                  76                  99
## ENSMUSG00000000031                  10                  56                  24
## ENSMUSG00000000037                  60                 128                  50
## ENSMUSG00000000049                  11                   4                   6
## ENSMUSG00000000056                1680                2196                2152
##                    MHU-8_RTN_FLT_FLT08 MHU-8_RTN_FLT_FLT09 MHU-8_RTN_FLT_FLT10
## ENSMUSG00000000001                2124                2749                1847
## ENSMUSG00000000028                 149                 111                  93
## ENSMUSG00000000031                  21                  14                  28
## ENSMUSG00000000037                  80                  79                  78
## ENSMUSG00000000049                   9                   6                  16
## ENSMUSG00000000056                3421                2412                2554
##                    MHU-8_RTN_FLT_FLT11 MHU-8_RTN_FLT_FLT12 MHU-8_RTN_FLT_FLT13
## ENSMUSG00000000001                1212                2413                1564
## ENSMUSG00000000028                 121                 254                 102
## ENSMUSG00000000031                  11                  26                  28
## ENSMUSG00000000037                  55                  93                  61
## ENSMUSG00000000049                  14                  16                  13
## ENSMUSG00000000056                2086                4128                2088
##                    MHU-8_RTN_FLT_FLT14 MHU-8_RTN_FLT_FLT15 MHU-8_RTN_FLT_FLT16
## ENSMUSG00000000001                2854                2259                1348
## ENSMUSG00000000028                 226                 182                 129
## ENSMUSG00000000031                  23                  17                  17
## ENSMUSG00000000037                  65                  64                  61
## ENSMUSG00000000049                  17                  17                   4
## ENSMUSG00000000056                5112                4517                2535
##                    MHU-8_RTN_FLT_FLT17 MHU-8_RTN_FLT_FLT18 MHU-8_RTN_FLT_FLT19
## ENSMUSG00000000001                1761                1915                1633
## ENSMUSG00000000028                 133                 186                 132
## ENSMUSG00000000031                 517                   8                  12
## ENSMUSG00000000037                  79                  59                  52
## ENSMUSG00000000049                   6                   8                  12
## ENSMUSG00000000056                3209                3496                3246
##                    MHU-8_RTN_FLT_FLT20 MHU-8_RTN_FLT_FLT21 MHU-8_RTN_FLT_FLT22
## ENSMUSG00000000001                1674                1823                2155
## ENSMUSG00000000028                 194                 207                 173
## ENSMUSG00000000031                  55                  13                  17
## ENSMUSG00000000037                  58                  71                  62
## ENSMUSG00000000049                  19                  13                  20
## ENSMUSG00000000056                3558                4088                3520
##                    MHU-8_RTN_FLT_FLT23 MHU-8_RTN_FLT_FLT24 MHU-8_RTN_HGC_HGC01
## ENSMUSG00000000001                1746                1472                1853
## ENSMUSG00000000028                 188                 132                 137
## ENSMUSG00000000031                  20                6298                  33
## ENSMUSG00000000037                  59                  79                  49
## ENSMUSG00000000049                  15                   4                   3
## ENSMUSG00000000056                3726                2370                3093
##                    MHU-8_RTN_HGC_HGC02 MHU-8_RTN_HGC_HGC03 MHU-8_RTN_HGC_HGC04
## ENSMUSG00000000001                2207                2056                1465
## ENSMUSG00000000028                 173                 186                 132
## ENSMUSG00000000031                  59                  36                   9
## ENSMUSG00000000037                  76                  70                  54
## ENSMUSG00000000049                   7                   8                   1
## ENSMUSG00000000056                3697                3431                2485
##                    MHU-8_RTN_HGC_HGC05 MHU-8_RTN_HGC_HGC06 MHU-8_RTN_HGC_HGC07
## ENSMUSG00000000001                2140                1824                1614
## ENSMUSG00000000028                 207                 171                 182
## ENSMUSG00000000031                  17                  41                  19
## ENSMUSG00000000037                  72                  61                  52
## ENSMUSG00000000049                  11                   7                   7
## ENSMUSG00000000056                3788                3489                2806
##                    MHU-8_RTN_HGC_HGC08 MHU-8_RTN_HGC_HGC09 MHU-8_RTN_HGC_HGC10
## ENSMUSG00000000001                1891                1153                2424
## ENSMUSG00000000028                 199                 132                 267
## ENSMUSG00000000031                  68                  11                  32
## ENSMUSG00000000037                  67                  55                  73
## ENSMUSG00000000049                   9                   6                   4
## ENSMUSG00000000056                3809                2176                4045
##                    MHU-8_RTN_HGC_HGC11 MHU-8_RTN_HGC_HGC12
## ENSMUSG00000000001                2119                1218
## ENSMUSG00000000028                 228                 156
## ENSMUSG00000000031                  35                  25
## ENSMUSG00000000037                  54                  55
## ENSMUSG00000000049                   4                   1
## ENSMUSG00000000056                3652                2793

# Display the first rows of the normalised counts to compare with raw counts 
head(DESeq2::counts(de_deseq$dds, normalized = TRUE))

##                    MHU-8_RTN_FLT_FLT02 MHU-8_RTN_FLT_FLT03 MHU-8_RTN_FLT_FLT04
## ENSMUSG00000000001          1865.19583          1991.36188         2044.242473
## ENSMUSG00000000028            95.35945            99.52828          146.681370
## ENSMUSG00000000031            25.37086            21.49811           28.923087
## ENSMUSG00000000037            80.48687           102.71319           92.967065
## ENSMUSG00000000049            19.24686            17.51698            9.296707
## ENSMUSG00000000056          3161.73439          3987.50112         3372.638542
##                    MHU-8_RTN_FLT_FLT05 MHU-8_RTN_FLT_FLT06 MHU-8_RTN_FLT_FLT07
## ENSMUSG00000000001          1455.89645         3057.282789         1867.260660
## ENSMUSG00000000028            82.02234           83.851856          128.642175
## ENSMUSG00000000031            13.67039           61.785578           31.185982
## ENSMUSG00000000037            82.02234          141.224178           64.970795
## ENSMUSG00000000049            15.03743            4.413256            7.796495
## ENSMUSG00000000056          2296.62539         2422.877303         2796.343034
##                    MHU-8_RTN_FLT_FLT08 MHU-8_RTN_FLT_FLT09 MHU-8_RTN_FLT_FLT10
## ENSMUSG00000000001         2047.416339         2836.616800          1843.37626
## ENSMUSG00000000028          143.627606          114.537819            92.81754
## ENSMUSG00000000031           20.242817           14.446211            27.94507
## ENSMUSG00000000037           77.115493           81.517907            77.84697
## ENSMUSG00000000049            8.675493            6.191233            15.96861
## ENSMUSG00000000056         3297.651269         2488.875854          2548.98915
##                    MHU-8_RTN_FLT_FLT11 MHU-8_RTN_FLT_FLT12 MHU-8_RTN_FLT_FLT13
## ENSMUSG00000000001          1587.30895          1907.57546          1821.02624
## ENSMUSG00000000028           158.46896           200.79742           118.76258
## ENSMUSG00000000031            14.40627            20.55407            32.60149
## ENSMUSG00000000037            72.03135            73.52031            71.02468
## ENSMUSG00000000049            18.33525            12.64866            15.13641
## ENSMUSG00000000056          2731.95254          3263.35329          2431.13989
##                    MHU-8_RTN_FLT_FLT14 MHU-8_RTN_FLT_FLT15 MHU-8_RTN_FLT_FLT16
## ENSMUSG00000000001          2073.55587          1826.65007         1661.578318
## ENSMUSG00000000028           164.19889           147.16703          159.008608
## ENSMUSG00000000031            16.71051            13.74637           20.954623
## ENSMUSG00000000037            47.22534            51.75104           75.190117
## ENSMUSG00000000049            12.35124            13.74637            4.930499
## ENSMUSG00000000056          3714.09166          3652.49154         3124.704032
##                    MHU-8_RTN_FLT_FLT17 MHU-8_RTN_FLT_FLT18 MHU-8_RTN_FLT_FLT19
## ENSMUSG00000000001         1643.168889         1822.014600          1673.31612
## ENSMUSG00000000028          124.100774          176.968520           135.25887
## ENSMUSG00000000031          482.406767            7.611549            12.29626
## ENSMUSG00000000037           73.713993           56.135176            53.28380
## ENSMUSG00000000049            5.598531            7.611549            12.29626
## ENSMUSG00000000056         2994.281071         3326.247020          3326.13847
##                    MHU-8_RTN_FLT_FLT20 MHU-8_RTN_FLT_FLT21 MHU-8_RTN_FLT_FLT22
## ENSMUSG00000000001          1468.88472          1619.73692          1961.28814
## ENSMUSG00000000028           170.22917           183.91966           157.44912
## ENSMUSG00000000031            48.26085            11.55051            15.47188
## ENSMUSG00000000037            50.89326            63.08356            56.42685
## ENSMUSG00000000049            16.67193            11.55051            18.20221
## ENSMUSG00000000056          3122.03814          3632.19119          3203.58898
##                    MHU-8_RTN_FLT_FLT23 MHU-8_RTN_FLT_FLT24 MHU-8_RTN_HGC_HGC01
## ENSMUSG00000000001          1698.43313         1696.297093         1863.749414
## ENSMUSG00000000028           182.87825          152.113598          137.794749
## ENSMUSG00000000031            19.45513         7257.662425           33.191436
## ENSMUSG00000000037            57.39264           91.037684           49.284253
## ENSMUSG00000000049            14.59135            4.609503            3.017403
## ENSMUSG00000000056          3624.49133         2731.130509         3110.942762
##                    MHU-8_RTN_HGC_HGC02 MHU-8_RTN_HGC_HGC03 MHU-8_RTN_HGC_HGC04
## ENSMUSG00000000001         1744.260471         1800.374182         1759.763969
## ENSMUSG00000000028          136.727259          162.874318          158.558938
## ENSMUSG00000000031           46.629528           31.524062           10.810837
## ENSMUSG00000000037           60.065154           61.296786           64.865020
## ENSMUSG00000000049            5.532317            7.005347            1.201204
## ENSMUSG00000000056         2921.853630         3004.418200         2984.992125
##                    MHU-8_RTN_HGC_HGC05 MHU-8_RTN_HGC_HGC06 MHU-8_RTN_HGC_HGC07
## ENSMUSG00000000001         1829.895178         1783.974880         1831.460294
## ENSMUSG00000000028          177.003879          167.247645          206.521545
## ENSMUSG00000000031           14.536550           40.100313           21.559942
## ENSMUSG00000000037           61.566567           59.661441           59.006156
## ENSMUSG00000000049            9.406003            6.846395            7.943136
## ENSMUSG00000000056         3239.085483         3412.438792         3184.062941
##                    MHU-8_RTN_HGC_HGC08 MHU-8_RTN_HGC_HGC09 MHU-8_RTN_HGC_HGC10
## ENSMUSG00000000001         1589.271083          1663.09100          2008.64181
## ENSMUSG00000000028          167.247459           190.39724           221.24891
## ENSMUSG00000000031           57.149886            15.86644            26.51672
## ENSMUSG00000000037           56.309446            79.33218            60.49128
## ENSMUSG00000000049            7.563955             8.65442             3.31459
## ENSMUSG00000000056         3201.234032          3138.66958          3351.87960
##                    MHU-8_RTN_HGC_HGC11 MHU-8_RTN_HGC_HGC12
## ENSMUSG00000000001         1875.300777         1462.382526
## ENSMUSG00000000028          201.778470          187.300225
## ENSMUSG00000000031           30.974765           30.016062
## ENSMUSG00000000037           47.789638           66.035336
## ENSMUSG00000000049            3.539973            1.200642
## ENSMUSG00000000056         3231.995487         3353.394413

# Convert the normalised counts from the DESeq2 object to a tibble
normalised_counts <- tibble::as_tibble(DESeq2::counts(de_deseq$dds, normalized = TRUE),
                                       rownames = "ensembl_gen_id")
head(normalised_counts)

## # A tibble: 6 × 36
##   ensembl_gen_id     `MHU-8_RTN_FLT_FLT02` `MHU-8_RTN_FLT_FLT03`
##   <chr>                              <dbl>                 <dbl>
## 1 ENSMUSG00000000001                1865.                 1991. 
## 2 ENSMUSG00000000028                  95.4                  99.5
## 3 ENSMUSG00000000031                  25.4                  21.5
## 4 ENSMUSG00000000037                  80.5                 103. 
## 5 ENSMUSG00000000049                  19.2                  17.5
## 6 ENSMUSG00000000056                3162.                 3988. 
## # ℹ 33 more variables: `MHU-8_RTN_FLT_FLT04` <dbl>,
## #   `MHU-8_RTN_FLT_FLT05` <dbl>, `MHU-8_RTN_FLT_FLT06` <dbl>,
## #   `MHU-8_RTN_FLT_FLT07` <dbl>, `MHU-8_RTN_FLT_FLT08` <dbl>,
## #   `MHU-8_RTN_FLT_FLT09` <dbl>, `MHU-8_RTN_FLT_FLT10` <dbl>,
## #   `MHU-8_RTN_FLT_FLT11` <dbl>, `MHU-8_RTN_FLT_FLT12` <dbl>,
## #   `MHU-8_RTN_FLT_FLT13` <dbl>, `MHU-8_RTN_FLT_FLT14` <dbl>,
## #   `MHU-8_RTN_FLT_FLT15` <dbl>, `MHU-8_RTN_FLT_FLT16` <dbl>, …

# Find the names of the estimated effects (coefficients) of the model
DESeq2::resultsNames(de_deseq$dds)

## [1] "Intercept"                       "gravity_class_0.33_G_vs_1.00_G" 
## [3] "gravity_class_0.66_G_vs_1.00_G"  "gravity_class_micro_G_vs_1.00_G"

# Extract DE results for each gravity condition vs 1.00 G
    # The results function by default applies the Benjamini-Hochberg method to control FDR
de_deseq$res_033_vs_1G <- DESeq2::results(de_deseq$dds, name = "gravity_class_0.33_G_vs_1.00_G")
de_deseq$res_066_vs_1G <- DESeq2::results(de_deseq$dds, name = "gravity_class_0.66_G_vs_1.00_G")
de_deseq$res_micro_vs_1G <- DESeq2::results(de_deseq$dds, name = "gravity_class_micro_G_vs_1.00_G")


# Summarise the results:
  # Shows the number of tested genes, the number up- and down-regulated (at alpha),
  # and how many were excluded by multiple testing due to low counts.
DESeq2::summary(de_deseq$res_033_vs_1G)

## 
## out of 37806 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 627, 1.7%
## LFC < 0 (down)     : 415, 1.1%
## outliers [1]       : 292, 0.77%
## low counts [2]     : 13163, 35%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

DESeq2::summary(de_deseq$res_066_vs_1G)

## 
## out of 37806 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 133, 0.35%
## LFC < 0 (down)     : 103, 0.27%
## outliers [1]       : 292, 0.77%
## low counts [2]     : 10264, 27%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

DESeq2::summary(de_deseq$res_micro_vs_1G)

## 
## out of 37806 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 599, 1.6%
## LFC < 0 (down)     : 328, 0.87%
## outliers [1]       : 292, 0.77%
## low counts [2]     : 10264, 27%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

# Extract significant results (padj < 0.05) and convert to tibble
de_deseq$sig_033_vs_1G <-
  de_deseq$res_033_vs_1G |>
  tibble::as_tibble(rownames = "ensembl_gen_id") |>
  dplyr::filter(!is.na(padj), padj < 0.05) |>
  dplyr::arrange(padj, log2FoldChange)

de_deseq$sig_066_vs_1G <-
  de_deseq$res_066_vs_1G |>
  tibble::as_tibble(rownames = "ensembl_gen_id") |>
  dplyr::filter(!is.na(padj), padj < 0.05) |>
  dplyr::arrange(padj, log2FoldChange)

de_deseq$sig_micro_vs_1G <-
  de_deseq$res_micro_vs_1G |>
  tibble::as_tibble(rownames = "ensembl_gen_id") |>
  dplyr::filter(!is.na(padj), padj < 0.05) |>
  dplyr::arrange(padj, log2FoldChange)

# Look at the top results
head(de_deseq$sig_033_vs_1G)

## # A tibble: 6 × 7
##   ensembl_gen_id     baseMean log2FoldChange lfcSE  stat   pvalue     padj
##   <chr>                 <dbl>          <dbl> <dbl> <dbl>    <dbl>    <dbl>
## 1 ENSMUSG00000031167   4644.           1.93  0.186 10.4  2.52e-25 6.13e-21
## 2 ENSMUSG00000045193  11624.           1.40  0.143  9.77 1.48e-22 1.80e-18
## 3 ENSMUSG00000035711    129.          -1.40  0.184 -7.59 3.11e-14 2.53e-10
## 4 ENSMUSG00000032611     89.1         -0.849 0.118 -7.17 7.57e-13 4.61e- 9
## 5 ENSMUSG00000072969    708.           0.740 0.106  6.97 3.17e-12 1.54e- 8
## 6 ENSMUSG00000081058   1303.          -1.71  0.247 -6.94 3.97e-12 1.61e- 8

head(de_deseq$sig_066_vs_1G)

## # A tibble: 6 × 7
##   ensembl_gen_id     baseMean log2FoldChange lfcSE  stat   pvalue     padj
##   <chr>                 <dbl>          <dbl> <dbl> <dbl>    <dbl>    <dbl>
## 1 ENSMUSG00000031167   4644.            1.44 0.186  7.73 1.10e-14 2.99e-10
## 2 ENSMUSG00000045193  11624.            1.03 0.143  7.22 5.19e-13 7.07e- 9
## 3 ENSMUSG00000083405     18.1         -22.1  3.24  -6.83 8.48e-12 6.24e- 8
## 4 ENSMUSG00000067996    313.          -20.4  2.98  -6.82 9.16e-12 6.24e- 8
## 5 ENSMUSG00000070392   5076.           -1.05 0.174 -6.03 1.68e- 9 9.17e- 6
## 6 ENSMUSG00000066170    153.            2.04 0.357  5.72 1.09e- 8 4.97e- 5

head(de_deseq$sig_micro_vs_1G)

## # A tibble: 6 × 7
##   ensembl_gen_id     baseMean log2FoldChange lfcSE  stat   pvalue     padj
##   <chr>                 <dbl>          <dbl> <dbl> <dbl>    <dbl>    <dbl>
## 1 ENSMUSG00000031167   4644.           1.71  0.200  8.57 1.08e-17 2.94e-13
## 2 ENSMUSG00000045193  11624.           1.28  0.153  8.36 6.48e-17 8.83e-13
## 3 ENSMUSG00000020288    651.          -0.889 0.116 -7.67 1.73e-14 1.57e-10
## 4 ENSMUSG00000085511     20.0          2.37  0.365  6.51 7.74e-11 3.52e- 7
## 5 ENSMUSG00000066170    153.           2.49  0.382  6.54 6.33e-11 3.52e- 7
## 6 ENSMUSG00000038763     63.7          2.69  0.412  6.53 6.69e-11 3.52e- 7

# List to save all the visualization plots
de_plots <- list()

# Extract only gene ids from the significant results
sig_gene_ids <- de_deseq$sig_micro_vs_1G$ensembl_gen_id

# Map between ENSEMBL gene ids and gene symbol
ensembl2symbol <- OSD758::gene_expression("long") |>
  dplyr::select(ensembl_gen_id, gene_symbol) |>
  dplyr::distinct()
  
# Get normalised counts for significant genes 
sig_normalised_counts <- normalised_counts |>
  dplyr::filter(ensembl_gen_id %in% sig_gene_ids) |>
  dplyr::left_join(ensembl2symbol, by = "ensembl_gen_id") |>
  dplyr::select(-ensembl_gen_id) |>
  tibble::column_to_rownames("gene_symbol") |>
  as.matrix()


# Scale each row: subtract mean and divide by SD.
# The 2 transpositions are required because, by default, scale applies to the columns.
sig_normalised_counts_scaled <- t(scale(t(sig_normalised_counts)))   # scale rows, not columns

# Find min and max values to get meaningful colors in heatmaps
range(sig_normalised_counts_scaled)

## [1] -3.324583  5.724152

# Complex heatmap
de_plots$ht <- ComplexHeatmap::Heatmap(sig_normalised_counts_scaled[1:200, ],
                        name = "Exprs (z-score)",
                        column_title = "Microgravity vs Earth's Gravity (1G) | Top 200 DE genes",
                        cluster_columns = TRUE,
                        cluster_rows = TRUE,
                        # number of clusters in K-means to split rows
                        row_km = 2,
                        # add cluster names
                        row_title = c("A", "B"),
                        row_title_rot = 90,
                        row_gap = unit(2, "mm"),
                        # number of clusters in K-means to split columns
                        column_km = 3,
                        column_gap = unit(2, "mm"),
                        border = "grey",
                        na_col = "white",
                        # Color range (min and max values from sig_normalised_counts_scaled)
                        col = circlize::colorRamp2(c(-4, 0, 6), c("skyblue3", "white", "forestgreen")),
                        column_names_gp = grid::gpar(fontsize = 9),
                        row_names_gp = grid::gpar(fontsize = 5),
                        rect_gp = grid::gpar(col = "grey", lwd = 0.5))

# Print the plot
ComplexHeatmap::draw(de_plots$ht, heatmap_legend_side = "right")

# Add a column with differential expression status and add gene symbol to the results
sig_res_annot <- 
  de_deseq$sig_micro_vs_1G |>
  dplyr::mutate(diffexpressed = case_when(
    log2FoldChange > 1 & padj < 0.05 ~ 'upregulated',
    log2FoldChange < -1 & padj < 0.05 ~ 'downregulated',
    TRUE ~ 'not_de')) |>
  dplyr::left_join(ensembl2symbol, by = "ensembl_gen_id") |>
  dplyr::select(-ensembl_gen_id) |>
  # add gene symbols
  dplyr::relocate(gene_symbol, .before =  1L) |>
  dplyr::arrange(padj, log2FoldChange)


# Create a volcano plot using ggplot2
de_plots$volcano_plot <-
  ggplot(data = sig_res_annot, aes(
    x = log2FoldChange,
    y = -log10(padj),
    col = diffexpressed))+
  geom_point(size = 0.6) +
  geom_text_repel(data = filter(sig_res_annot, 
                                ((abs(log2FoldChange) > log2(8)) & (padj < -log10(0.05)))), 
                  aes(label = gene_symbol), size = 1.5, max.overlaps = Inf) +
  ggtitle("DE genes micro gravity versus Earth's gravity") +
  geom_vline(xintercept = c(-1, 1), col = "black", linetype = 'dashed', linewidth = 0.2) +
  geom_hline(yintercept = -log10(0.05), col = "black", linetype = 'dashed', linewidth = 0.2) +
  theme(plot.title = element_text(size = rel(1), hjust = 0.5),
        axis.title = element_text(size = rel(1))) +
  scale_color_manual(values = c("upregulated" = "red",
                                "downregulated" = "blue",
                                "not_de" = "grey")) +
  labs(color = 'DE genes') +
  xlim(-5, 5) +   # Caution: This hides some genes
  ylim(0, 7.5) +  # Caution: This hides some genes
  theme_light()

# Print the volcano plot
de_plots$volcano_plot

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

# Enrichment analysis (ORA)

# Create a list to save the enrichment analysis results
fun_enrich <- list()

# Prepare list of significant DE genes in descending Log2FoldChange
fun_enrich$de_genes_fc <-
  de_deseq$sig_micro_vs_1G |>
  dplyr::select(ensembl_gen_id, log2FoldChange) |>
  dplyr::arrange(dplyr::desc(log2FoldChange))

# Run GO enrichment analysis using the enrichGO function
fun_enrich$ego <- clusterProfiler::enrichGO(
  gene = fun_enrich$de_genes_fc$ensembl_gen_id, # Genes of interest
  universe = ensembl2symbol$ensembl_gen_id,     # Background gene set
  OrgDb = org.Mm.eg.db,                         # Annotation database
  keyType = 'ENSEMBL',                          # Key type for gene identifiers
  readable = TRUE,                              # Convert gene IDs to gene names
  ont = "BP",                                   # Ontology: can be "BP", "MF", "CC", or "ALL"
  pvalueCutoff = 0.05,                          # P-value cutoff for significance
  qvalueCutoff = 0.10                           # Q-value cutoff for significance
)


# Visualize the enriched GO terms
fun_enrich$dotplot <- 
  enrichplot::dotplot(fun_enrich$ego, showCategory = 20, title = "GO BP | Enrichment barplot")

fun_enrich$heatplot <- 
  enrichplot::heatplot(fun_enrich$ego, showCategory = 10, 
                       foldChange = fun_enrich$de_genes_fc$log2FoldChange) +
  ggplot2::ggtitle("GO BP | Enrichment heatplot")

fun_enrich$emapplot <- 
  enrichplot::emapplot(pairwise_termsim(fun_enrich$ego), showCategory = 15, layout = "nicely")

fun_enrich$cnetplot <- 
  enrichplot::cnetplot(fun_enrich$ego, categorySize = "pvalue", showCategory = 5, 
                                 layout = "nicely", foldChange = fun_enrich$de_genes_fc$log2FoldChange)

fun_enrich$treeplot <- 
  enrichplot::treeplot(enrichplot::pairwise_termsim(fun_enrich$ego), 
                       showCategory = 20, nCluster=5, offset = rel(2)) + 
  ggplot2::ggtitle("GO BP | Enrichment treeplot") + 
  ggplot2::theme(text = element_text(size = 8))

## Warning in treeplot.enrichResult(x, ...): Use 'offset.params = list(bar_tree = your_value)' instead of 'offset'.
##  The offset parameter will be removed in the next version.

## Warning in treeplot.enrichResult(x, ...): Use 'cluster.params = list(n = your_value)' instead of 'nCluster'.
##  The nCluster parameter will be removed in the next version.

# Combine the enrichment plots into panels from a single figure
(fun_enrich$dotplot) |
(fun_enrich$emapplot / fun_enrich$cnetplot)

## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Differential expression analysis with DESeq2

Michael Warren G. Ceballos

2025-06-04

Including Plots