# # Install the required packages if not already installed
# install.packages(c("pak"))
#
# pak::pkg_install(c("BiocManager", "remotes", "here", "tidyverse",
# "DESeq2", "pheatmap", "RColorBrewer", "ggrepel", "clusterProfiler",
# "enrichplot", "org.Mm.eg.db", "patchwork", "ComplexHeatmap"
# ))
#
# # Install the course data package
# pak::pak("patterninstitute/OSD758")
# Load packages
library("here") # package to find your current working directory
## here() starts at /home/students/student01/ rnaseq_counts2bio_course
library("tidyverse") # packages for data manipulation and visualization
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("DESeq2") # differential expression analysis
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
##
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
##
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
##
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
##
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff,
## table, tapply, union, unique, unsplit, which.max, which.min
##
##
## Attaching package: 'S4Vectors'
##
## The following objects are masked from 'package:lubridate':
##
## second, second<-
##
## The following objects are masked from 'package:dplyr':
##
## first, rename
##
## The following object is masked from 'package:tidyr':
##
## expand
##
## The following object is masked from 'package:utils':
##
## findMatches
##
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
##
## The following object is masked from 'package:lubridate':
##
## %within%
##
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## The following object is masked from 'package:purrr':
##
## reduce
##
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
##
## The following object is masked from 'package:dplyr':
##
## count
##
##
## Attaching package: 'MatrixGenerics'
##
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
##
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
##
## Attaching package: 'Biobase'
##
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
##
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library("pheatmap") # heatmaps
library("RColorBrewer") # color palettes
library("ggrepel") # repel overlapping text labels in ggplot2 plots
library("clusterProfiler") # for enrichment analysis
##
## clusterProfiler v4.14.6 Learn more at https://yulab-smu.top/contribution-knowledge-mining/
##
## Please cite:
##
## Guangchuang Yu, Li-Gen Wang, Yanyan Han and Qing-Yu He.
## clusterProfiler: an R package for comparing biological themes among
## gene clusters. OMICS: A Journal of Integrative Biology. 2012,
## 16(5):284-287
##
## Attaching package: 'clusterProfiler'
##
## The following object is masked from 'package:IRanges':
##
## slice
##
## The following object is masked from 'package:S4Vectors':
##
## rename
##
## The following object is masked from 'package:purrr':
##
## simplify
##
## The following object is masked from 'package:stats':
##
## filter
library("enrichplot") # to draw functional enrichment results
## enrichplot v1.26.6 Learn more at https://yulab-smu.top/contribution-knowledge-mining/
##
## Please cite:
##
## Qianwen Wang, Ming Li, Tianzhi Wu, Li Zhan, Lin Li, Meijun Chen, Wenqin
## Xie, Zijing Xie, Erqiang Hu, Shuangbin Xu, Guangchuang Yu. Exploring
## epigenomic datasets by ChIPseeker. Current Protocols. 2022, 2(10): e585
library("org.Mm.eg.db") # mouse gene annotation database
## Loading required package: AnnotationDbi
##
## Attaching package: 'AnnotationDbi'
##
## The following object is masked from 'package:clusterProfiler':
##
## select
##
## The following object is masked from 'package:dplyr':
##
## select
library("patchwork") # combining multiple plots
library("ComplexHeatmap") # to draw heatmaps
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.22.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
## ! pheatmap() has been masked by ComplexHeatmap::pheatmap(). Most of the arguments
## in the original pheatmap() are identically supported in the new function. You
## can still use the original function by explicitly calling pheatmap::pheatmap().
##
##
## Attaching package: 'ComplexHeatmap'
##
## The following object is masked from 'package:pheatmap':
##
## pheatmap
# Install and load package containing the data
library(OSD758)
##
## Attaching package: 'OSD758'
##
## The following object is masked from 'package:Biobase':
##
## samples
# Gene expression in Counts
raw_counts <- OSD758::gene_expression(format = "wide", only_expressed_genes = TRUE)
# View(raw_counts)
# Samples metadata
samples <- OSD758::samples()
# View(samples)
# Create a list to save the QC results
qc <- list()
# You can choose between vst() and rlog() - this tutorial uses vst.
qc$vst <- DESeq2::vst(raw_counts, blind = TRUE)
# Run PCA
qc$pca_vst <- prcomp(t(qc$vst))
# Extract the components
qc$components <- qc$pca_vst[["x"]]
qc$components <- tibble::as_tibble(qc$components, rownames = "sample_id")
# Add sample annotations to components for plot coloring
qc$components_annot <-
dplyr::left_join(qc$components, as.data.frame(samples[, c(1,5,6,8)]), by = "sample_id") |>
dplyr::relocate(spacecraft, acceleration_source, gravity_class, .after = sample_id)
# Calculate the % variance per component
qc$pca_percent_var <- round(qc$pca_vst$sdev^2/sum(qc$pca_vst$sdev^2)*100)
#
# 2D PCA | Using ggplot2
#
# Color by gravity_class
qc$pca_gravity <-
ggplot(qc$components_annot, aes(x = PC1, y = PC2, color = gravity_class)) +
geom_point(size = 1) +
labs(
title = "PCA gene expression | Colored by gravity_class",
x = paste0("PC1 (", qc$pca_percent_var[1], "% variance)"),
y = paste0("PC2 (", qc$pca_percent_var[2], "% variance)")
) +
theme_minimal()
# Color by accelaration_source
qc$pca_acceleration <-
ggplot(qc$components_annot, aes(x = PC1, y = PC2, color = acceleration_source)) +
geom_point(size = 1) +
labs(
title = "PCA gene expression | Colored by acceleration_source",
x = paste0("PC1 (", qc$pca_percent_var[1], "% variance)"),
y = paste0("PC2 (", qc$pca_percent_var[2], "% variance)")
) +
theme_minimal()
# Color by gravity_class and shape by acceleration_source
qc$pca_gravity_acceleration <-
ggplot(qc$components_annot, aes(x = PC1, y = PC2,
color = gravity_class,
shape = acceleration_source)) +
geom_point(size = 1) +
labs(
title = "PCA gene expression | Colored by gravity_class | Shape acceleration_source",
x = paste0("PC1 (", qc$pca_percent_var[1], "% variance)"),
y = paste0("PC2 (", qc$pca_percent_var[2], "% variance)")
) +
theme_minimal()
# Assemble pca plots
qc$pca_gravity_acceleration /
(qc$pca_gravity | qc$pca_acceleration)
# Plot sample to sample distance for hierarchical clustering
# Calculate Euclidean distances between samples (rows) by transposing the matrix with t().
qc$sample_dist_matrix <- as.matrix(dist(t(qc$vst), method = "euclidean"))
# Define a color palette for the heatmap
qc$colors <- colorRampPalette(rev(brewer.pal(9, "Greens")))(255) # function from RColorBrewer package
# Create the heatmap
qc$dist_clustering <- pheatmap::pheatmap(
qc$sample_dist_matrix,
cluster_rows = TRUE,
cluster_cols = TRUE,
col = qc$colors,
fontsize_col = 8,
fontsize_row = 5
)
### Compute pairwise correlation values
qc$sample_corr <- cor(qc$vst)
### Plot heatmap using the correlation matrix
qc$corr_clustering <-
pheatmap::pheatmap(
qc$sample_corr,
cluster_rows = TRUE,
cluster_cols = TRUE,
fontsize_row = 5,
fontsize_col = 8
)
# Create list to save the analysis objects
de_deseq <- list()
# Check that sample ids match between raw_counts and samples
# Ensure same content
stopifnot(setequal(colnames(raw_counts), samples$sample_id))
# Reorder columns to match sample order
raw_counts <- raw_counts[, samples$sample_id]
# Make sure the factor levels are ordered so that the desired baseline comes first.
# DESeq2 uses the first level from factors as the baseline.
samples <-
samples |>
dplyr::mutate(gravity_class = factor(
gravity_class,
levels = c("1.00_G", "0.33_G", "0.66_G", "micro_G")
))
# DE Step 1: Create a DESeqDataSet object (dds)
de_deseq$dds <- DESeq2::DESeqDataSetFromMatrix(
countData = raw_counts,
colData = samples,
design = ~ gravity_class
)
# DE Step 2: Run the DESeq function to perform the analysis
de_deseq$dds <- DESeq(de_deseq$dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 452 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
# Check the design formula
DESeq2::design(de_deseq$dds)
## ~gravity_class
# Check the sample info
SummarizedExperiment::colData(de_deseq$dds)
## DataFrame with 35 rows and 12 columns
## sample_id organism sex matrix
## <character> <character> <factor> <character>
## MHU-8_RTN_FLT_FLT02 MHU-8_RTN_FLT_FLT02 Mus musculus male Right retina
## MHU-8_RTN_FLT_FLT03 MHU-8_RTN_FLT_FLT03 Mus musculus male Right retina
## MHU-8_RTN_FLT_FLT04 MHU-8_RTN_FLT_FLT04 Mus musculus male Right retina
## MHU-8_RTN_FLT_FLT05 MHU-8_RTN_FLT_FLT05 Mus musculus male Right retina
## MHU-8_RTN_FLT_FLT06 MHU-8_RTN_FLT_FLT06 Mus musculus male Right retina
## ... ... ... ... ...
## MHU-8_RTN_HGC_HGC08 MHU-8_RTN_HGC_HGC08 Mus musculus male Right retina
## MHU-8_RTN_HGC_HGC09 MHU-8_RTN_HGC_HGC09 Mus musculus male Right retina
## MHU-8_RTN_HGC_HGC10 MHU-8_RTN_HGC_HGC10 Mus musculus male Right retina
## MHU-8_RTN_HGC_HGC11 MHU-8_RTN_HGC_HGC11 Mus musculus male Right retina
## MHU-8_RTN_HGC_HGC12 MHU-8_RTN_HGC_HGC12 Mus musculus male Right retina
## spacecraft acceleration_source gravity gravity_class
## <factor> <factor> <numeric> <factor>
## MHU-8_RTN_FLT_FLT02 ISS Centrifugal Acceleration 3.3e-01 0.33_G
## MHU-8_RTN_FLT_FLT03 ISS Centrifugal Acceleration 6.6e-01 0.66_G
## MHU-8_RTN_FLT_FLT04 ISS Centrifugal Acceleration 1.0e+00 1.00_G
## MHU-8_RTN_FLT_FLT05 ISS Orbital Free Fall 1.0e-06 micro_G
## MHU-8_RTN_FLT_FLT06 ISS Centrifugal Acceleration 3.3e-01 0.33_G
## ... ... ... ... ...
## MHU-8_RTN_HGC_HGC08 Earth Terrestrial Gravity 1 1.00_G
## MHU-8_RTN_HGC_HGC09 Earth Terrestrial Gravity 1 1.00_G
## MHU-8_RTN_HGC_HGC10 Earth Terrestrial Gravity 1 1.00_G
## MHU-8_RTN_HGC_HGC11 Earth Terrestrial Gravity 1 1.00_G
## MHU-8_RTN_HGC_HGC12 Earth Terrestrial Gravity 1 1.00_G
## weight_at_launch weight_at_euthanasia sizeFactor
## <numeric> <numeric> <numeric>
## MHU-8_RTN_FLT_FLT02 26.1 27.59 1.143044
## MHU-8_RTN_FLT_FLT03 26.0 24.35 1.255924
## MHU-8_RTN_FLT_FLT04 27.6 26.88 0.968085
## MHU-8_RTN_FLT_FLT05 27.3 28.15 0.731508
## MHU-8_RTN_FLT_FLT06 26.2 24.45 0.906360
## ... ... ... ...
## MHU-8_RTN_HGC_HGC08 28.0 28.00 1.189854
## MHU-8_RTN_HGC_HGC09 25.8 26.36 0.693287
## MHU-8_RTN_HGC_HGC10 25.9 26.05 1.206786
## MHU-8_RTN_HGC_HGC11 26.6 26.71 1.129952
## MHU-8_RTN_HGC_HGC12 27.3 26.60 0.832887
## replaceable
## <logical>
## MHU-8_RTN_FLT_FLT02 FALSE
## MHU-8_RTN_FLT_FLT03 FALSE
## MHU-8_RTN_FLT_FLT04 TRUE
## MHU-8_RTN_FLT_FLT05 FALSE
## MHU-8_RTN_FLT_FLT06 FALSE
## ... ...
## MHU-8_RTN_HGC_HGC08 TRUE
## MHU-8_RTN_HGC_HGC09 TRUE
## MHU-8_RTN_HGC_HGC10 TRUE
## MHU-8_RTN_HGC_HGC11 TRUE
## MHU-8_RTN_HGC_HGC12 TRUE
# Display the first rows of the raw counts
head(DESeq2::counts(de_deseq$dds))
## MHU-8_RTN_FLT_FLT02 MHU-8_RTN_FLT_FLT03 MHU-8_RTN_FLT_FLT04
## ENSMUSG00000000001 2132 2501 1979
## ENSMUSG00000000028 109 125 142
## ENSMUSG00000000031 29 27 28
## ENSMUSG00000000037 92 129 90
## ENSMUSG00000000049 22 22 9
## ENSMUSG00000000056 3614 5008 3265
## MHU-8_RTN_FLT_FLT05 MHU-8_RTN_FLT_FLT06 MHU-8_RTN_FLT_FLT07
## ENSMUSG00000000001 1065 2771 1437
## ENSMUSG00000000028 60 76 99
## ENSMUSG00000000031 10 56 24
## ENSMUSG00000000037 60 128 50
## ENSMUSG00000000049 11 4 6
## ENSMUSG00000000056 1680 2196 2152
## MHU-8_RTN_FLT_FLT08 MHU-8_RTN_FLT_FLT09 MHU-8_RTN_FLT_FLT10
## ENSMUSG00000000001 2124 2749 1847
## ENSMUSG00000000028 149 111 93
## ENSMUSG00000000031 21 14 28
## ENSMUSG00000000037 80 79 78
## ENSMUSG00000000049 9 6 16
## ENSMUSG00000000056 3421 2412 2554
## MHU-8_RTN_FLT_FLT11 MHU-8_RTN_FLT_FLT12 MHU-8_RTN_FLT_FLT13
## ENSMUSG00000000001 1212 2413 1564
## ENSMUSG00000000028 121 254 102
## ENSMUSG00000000031 11 26 28
## ENSMUSG00000000037 55 93 61
## ENSMUSG00000000049 14 16 13
## ENSMUSG00000000056 2086 4128 2088
## MHU-8_RTN_FLT_FLT14 MHU-8_RTN_FLT_FLT15 MHU-8_RTN_FLT_FLT16
## ENSMUSG00000000001 2854 2259 1348
## ENSMUSG00000000028 226 182 129
## ENSMUSG00000000031 23 17 17
## ENSMUSG00000000037 65 64 61
## ENSMUSG00000000049 17 17 4
## ENSMUSG00000000056 5112 4517 2535
## MHU-8_RTN_FLT_FLT17 MHU-8_RTN_FLT_FLT18 MHU-8_RTN_FLT_FLT19
## ENSMUSG00000000001 1761 1915 1633
## ENSMUSG00000000028 133 186 132
## ENSMUSG00000000031 517 8 12
## ENSMUSG00000000037 79 59 52
## ENSMUSG00000000049 6 8 12
## ENSMUSG00000000056 3209 3496 3246
## MHU-8_RTN_FLT_FLT20 MHU-8_RTN_FLT_FLT21 MHU-8_RTN_FLT_FLT22
## ENSMUSG00000000001 1674 1823 2155
## ENSMUSG00000000028 194 207 173
## ENSMUSG00000000031 55 13 17
## ENSMUSG00000000037 58 71 62
## ENSMUSG00000000049 19 13 20
## ENSMUSG00000000056 3558 4088 3520
## MHU-8_RTN_FLT_FLT23 MHU-8_RTN_FLT_FLT24 MHU-8_RTN_HGC_HGC01
## ENSMUSG00000000001 1746 1472 1853
## ENSMUSG00000000028 188 132 137
## ENSMUSG00000000031 20 6298 33
## ENSMUSG00000000037 59 79 49
## ENSMUSG00000000049 15 4 3
## ENSMUSG00000000056 3726 2370 3093
## MHU-8_RTN_HGC_HGC02 MHU-8_RTN_HGC_HGC03 MHU-8_RTN_HGC_HGC04
## ENSMUSG00000000001 2207 2056 1465
## ENSMUSG00000000028 173 186 132
## ENSMUSG00000000031 59 36 9
## ENSMUSG00000000037 76 70 54
## ENSMUSG00000000049 7 8 1
## ENSMUSG00000000056 3697 3431 2485
## MHU-8_RTN_HGC_HGC05 MHU-8_RTN_HGC_HGC06 MHU-8_RTN_HGC_HGC07
## ENSMUSG00000000001 2140 1824 1614
## ENSMUSG00000000028 207 171 182
## ENSMUSG00000000031 17 41 19
## ENSMUSG00000000037 72 61 52
## ENSMUSG00000000049 11 7 7
## ENSMUSG00000000056 3788 3489 2806
## MHU-8_RTN_HGC_HGC08 MHU-8_RTN_HGC_HGC09 MHU-8_RTN_HGC_HGC10
## ENSMUSG00000000001 1891 1153 2424
## ENSMUSG00000000028 199 132 267
## ENSMUSG00000000031 68 11 32
## ENSMUSG00000000037 67 55 73
## ENSMUSG00000000049 9 6 4
## ENSMUSG00000000056 3809 2176 4045
## MHU-8_RTN_HGC_HGC11 MHU-8_RTN_HGC_HGC12
## ENSMUSG00000000001 2119 1218
## ENSMUSG00000000028 228 156
## ENSMUSG00000000031 35 25
## ENSMUSG00000000037 54 55
## ENSMUSG00000000049 4 1
## ENSMUSG00000000056 3652 2793
# Display the first rows of the normalised counts to compare with raw counts
head(DESeq2::counts(de_deseq$dds, normalized = TRUE))
## MHU-8_RTN_FLT_FLT02 MHU-8_RTN_FLT_FLT03 MHU-8_RTN_FLT_FLT04
## ENSMUSG00000000001 1865.19583 1991.36188 2044.242473
## ENSMUSG00000000028 95.35945 99.52828 146.681370
## ENSMUSG00000000031 25.37086 21.49811 28.923087
## ENSMUSG00000000037 80.48687 102.71319 92.967065
## ENSMUSG00000000049 19.24686 17.51698 9.296707
## ENSMUSG00000000056 3161.73439 3987.50112 3372.638542
## MHU-8_RTN_FLT_FLT05 MHU-8_RTN_FLT_FLT06 MHU-8_RTN_FLT_FLT07
## ENSMUSG00000000001 1455.89645 3057.282789 1867.260660
## ENSMUSG00000000028 82.02234 83.851856 128.642175
## ENSMUSG00000000031 13.67039 61.785578 31.185982
## ENSMUSG00000000037 82.02234 141.224178 64.970795
## ENSMUSG00000000049 15.03743 4.413256 7.796495
## ENSMUSG00000000056 2296.62539 2422.877303 2796.343034
## MHU-8_RTN_FLT_FLT08 MHU-8_RTN_FLT_FLT09 MHU-8_RTN_FLT_FLT10
## ENSMUSG00000000001 2047.416339 2836.616800 1843.37626
## ENSMUSG00000000028 143.627606 114.537819 92.81754
## ENSMUSG00000000031 20.242817 14.446211 27.94507
## ENSMUSG00000000037 77.115493 81.517907 77.84697
## ENSMUSG00000000049 8.675493 6.191233 15.96861
## ENSMUSG00000000056 3297.651269 2488.875854 2548.98915
## MHU-8_RTN_FLT_FLT11 MHU-8_RTN_FLT_FLT12 MHU-8_RTN_FLT_FLT13
## ENSMUSG00000000001 1587.30895 1907.57546 1821.02624
## ENSMUSG00000000028 158.46896 200.79742 118.76258
## ENSMUSG00000000031 14.40627 20.55407 32.60149
## ENSMUSG00000000037 72.03135 73.52031 71.02468
## ENSMUSG00000000049 18.33525 12.64866 15.13641
## ENSMUSG00000000056 2731.95254 3263.35329 2431.13989
## MHU-8_RTN_FLT_FLT14 MHU-8_RTN_FLT_FLT15 MHU-8_RTN_FLT_FLT16
## ENSMUSG00000000001 2073.55587 1826.65007 1661.578318
## ENSMUSG00000000028 164.19889 147.16703 159.008608
## ENSMUSG00000000031 16.71051 13.74637 20.954623
## ENSMUSG00000000037 47.22534 51.75104 75.190117
## ENSMUSG00000000049 12.35124 13.74637 4.930499
## ENSMUSG00000000056 3714.09166 3652.49154 3124.704032
## MHU-8_RTN_FLT_FLT17 MHU-8_RTN_FLT_FLT18 MHU-8_RTN_FLT_FLT19
## ENSMUSG00000000001 1643.168889 1822.014600 1673.31612
## ENSMUSG00000000028 124.100774 176.968520 135.25887
## ENSMUSG00000000031 482.406767 7.611549 12.29626
## ENSMUSG00000000037 73.713993 56.135176 53.28380
## ENSMUSG00000000049 5.598531 7.611549 12.29626
## ENSMUSG00000000056 2994.281071 3326.247020 3326.13847
## MHU-8_RTN_FLT_FLT20 MHU-8_RTN_FLT_FLT21 MHU-8_RTN_FLT_FLT22
## ENSMUSG00000000001 1468.88472 1619.73692 1961.28814
## ENSMUSG00000000028 170.22917 183.91966 157.44912
## ENSMUSG00000000031 48.26085 11.55051 15.47188
## ENSMUSG00000000037 50.89326 63.08356 56.42685
## ENSMUSG00000000049 16.67193 11.55051 18.20221
## ENSMUSG00000000056 3122.03814 3632.19119 3203.58898
## MHU-8_RTN_FLT_FLT23 MHU-8_RTN_FLT_FLT24 MHU-8_RTN_HGC_HGC01
## ENSMUSG00000000001 1698.43313 1696.297093 1863.749414
## ENSMUSG00000000028 182.87825 152.113598 137.794749
## ENSMUSG00000000031 19.45513 7257.662425 33.191436
## ENSMUSG00000000037 57.39264 91.037684 49.284253
## ENSMUSG00000000049 14.59135 4.609503 3.017403
## ENSMUSG00000000056 3624.49133 2731.130509 3110.942762
## MHU-8_RTN_HGC_HGC02 MHU-8_RTN_HGC_HGC03 MHU-8_RTN_HGC_HGC04
## ENSMUSG00000000001 1744.260471 1800.374182 1759.763969
## ENSMUSG00000000028 136.727259 162.874318 158.558938
## ENSMUSG00000000031 46.629528 31.524062 10.810837
## ENSMUSG00000000037 60.065154 61.296786 64.865020
## ENSMUSG00000000049 5.532317 7.005347 1.201204
## ENSMUSG00000000056 2921.853630 3004.418200 2984.992125
## MHU-8_RTN_HGC_HGC05 MHU-8_RTN_HGC_HGC06 MHU-8_RTN_HGC_HGC07
## ENSMUSG00000000001 1829.895178 1783.974880 1831.460294
## ENSMUSG00000000028 177.003879 167.247645 206.521545
## ENSMUSG00000000031 14.536550 40.100313 21.559942
## ENSMUSG00000000037 61.566567 59.661441 59.006156
## ENSMUSG00000000049 9.406003 6.846395 7.943136
## ENSMUSG00000000056 3239.085483 3412.438792 3184.062941
## MHU-8_RTN_HGC_HGC08 MHU-8_RTN_HGC_HGC09 MHU-8_RTN_HGC_HGC10
## ENSMUSG00000000001 1589.271083 1663.09100 2008.64181
## ENSMUSG00000000028 167.247459 190.39724 221.24891
## ENSMUSG00000000031 57.149886 15.86644 26.51672
## ENSMUSG00000000037 56.309446 79.33218 60.49128
## ENSMUSG00000000049 7.563955 8.65442 3.31459
## ENSMUSG00000000056 3201.234032 3138.66958 3351.87960
## MHU-8_RTN_HGC_HGC11 MHU-8_RTN_HGC_HGC12
## ENSMUSG00000000001 1875.300777 1462.382526
## ENSMUSG00000000028 201.778470 187.300225
## ENSMUSG00000000031 30.974765 30.016062
## ENSMUSG00000000037 47.789638 66.035336
## ENSMUSG00000000049 3.539973 1.200642
## ENSMUSG00000000056 3231.995487 3353.394413
# Convert the normalised counts from the DESeq2 object to a tibble
normalised_counts <- tibble::as_tibble(DESeq2::counts(de_deseq$dds, normalized = TRUE),
rownames = "ensembl_gen_id")
head(normalised_counts)
## # A tibble: 6 × 36
## ensembl_gen_id `MHU-8_RTN_FLT_FLT02` `MHU-8_RTN_FLT_FLT03`
## <chr> <dbl> <dbl>
## 1 ENSMUSG00000000001 1865. 1991.
## 2 ENSMUSG00000000028 95.4 99.5
## 3 ENSMUSG00000000031 25.4 21.5
## 4 ENSMUSG00000000037 80.5 103.
## 5 ENSMUSG00000000049 19.2 17.5
## 6 ENSMUSG00000000056 3162. 3988.
## # ℹ 33 more variables: `MHU-8_RTN_FLT_FLT04` <dbl>,
## # `MHU-8_RTN_FLT_FLT05` <dbl>, `MHU-8_RTN_FLT_FLT06` <dbl>,
## # `MHU-8_RTN_FLT_FLT07` <dbl>, `MHU-8_RTN_FLT_FLT08` <dbl>,
## # `MHU-8_RTN_FLT_FLT09` <dbl>, `MHU-8_RTN_FLT_FLT10` <dbl>,
## # `MHU-8_RTN_FLT_FLT11` <dbl>, `MHU-8_RTN_FLT_FLT12` <dbl>,
## # `MHU-8_RTN_FLT_FLT13` <dbl>, `MHU-8_RTN_FLT_FLT14` <dbl>,
## # `MHU-8_RTN_FLT_FLT15` <dbl>, `MHU-8_RTN_FLT_FLT16` <dbl>, …
# Find the names of the estimated effects (coefficients) of the model
DESeq2::resultsNames(de_deseq$dds)
## [1] "Intercept" "gravity_class_0.33_G_vs_1.00_G"
## [3] "gravity_class_0.66_G_vs_1.00_G" "gravity_class_micro_G_vs_1.00_G"
# Extract DE results for each gravity condition vs 1.00 G
# The results function by default applies the Benjamini-Hochberg method to control FDR
de_deseq$res_033_vs_1G <- DESeq2::results(de_deseq$dds, name = "gravity_class_0.33_G_vs_1.00_G")
de_deseq$res_066_vs_1G <- DESeq2::results(de_deseq$dds, name = "gravity_class_0.66_G_vs_1.00_G")
de_deseq$res_micro_vs_1G <- DESeq2::results(de_deseq$dds, name = "gravity_class_micro_G_vs_1.00_G")
# Summarise the results:
# Shows the number of tested genes, the number up- and down-regulated (at alpha),
# and how many were excluded by multiple testing due to low counts.
DESeq2::summary(de_deseq$res_033_vs_1G)
##
## out of 37806 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 627, 1.7%
## LFC < 0 (down) : 415, 1.1%
## outliers [1] : 292, 0.77%
## low counts [2] : 13163, 35%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
DESeq2::summary(de_deseq$res_066_vs_1G)
##
## out of 37806 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 133, 0.35%
## LFC < 0 (down) : 103, 0.27%
## outliers [1] : 292, 0.77%
## low counts [2] : 10264, 27%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
DESeq2::summary(de_deseq$res_micro_vs_1G)
##
## out of 37806 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 599, 1.6%
## LFC < 0 (down) : 328, 0.87%
## outliers [1] : 292, 0.77%
## low counts [2] : 10264, 27%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Extract significant results (padj < 0.05) and convert to tibble
de_deseq$sig_033_vs_1G <-
de_deseq$res_033_vs_1G |>
tibble::as_tibble(rownames = "ensembl_gen_id") |>
dplyr::filter(!is.na(padj), padj < 0.05) |>
dplyr::arrange(padj, log2FoldChange)
de_deseq$sig_066_vs_1G <-
de_deseq$res_066_vs_1G |>
tibble::as_tibble(rownames = "ensembl_gen_id") |>
dplyr::filter(!is.na(padj), padj < 0.05) |>
dplyr::arrange(padj, log2FoldChange)
de_deseq$sig_micro_vs_1G <-
de_deseq$res_micro_vs_1G |>
tibble::as_tibble(rownames = "ensembl_gen_id") |>
dplyr::filter(!is.na(padj), padj < 0.05) |>
dplyr::arrange(padj, log2FoldChange)
# Look at the top results
head(de_deseq$sig_033_vs_1G)
## # A tibble: 6 × 7
## ensembl_gen_id baseMean log2FoldChange lfcSE stat pvalue padj
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ENSMUSG00000031167 4644. 1.93 0.186 10.4 2.52e-25 6.13e-21
## 2 ENSMUSG00000045193 11624. 1.40 0.143 9.77 1.48e-22 1.80e-18
## 3 ENSMUSG00000035711 129. -1.40 0.184 -7.59 3.11e-14 2.53e-10
## 4 ENSMUSG00000032611 89.1 -0.849 0.118 -7.17 7.57e-13 4.61e- 9
## 5 ENSMUSG00000072969 708. 0.740 0.106 6.97 3.17e-12 1.54e- 8
## 6 ENSMUSG00000081058 1303. -1.71 0.247 -6.94 3.97e-12 1.61e- 8
head(de_deseq$sig_066_vs_1G)
## # A tibble: 6 × 7
## ensembl_gen_id baseMean log2FoldChange lfcSE stat pvalue padj
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ENSMUSG00000031167 4644. 1.44 0.186 7.73 1.10e-14 2.99e-10
## 2 ENSMUSG00000045193 11624. 1.03 0.143 7.22 5.19e-13 7.07e- 9
## 3 ENSMUSG00000083405 18.1 -22.1 3.24 -6.83 8.48e-12 6.24e- 8
## 4 ENSMUSG00000067996 313. -20.4 2.98 -6.82 9.16e-12 6.24e- 8
## 5 ENSMUSG00000070392 5076. -1.05 0.174 -6.03 1.68e- 9 9.17e- 6
## 6 ENSMUSG00000066170 153. 2.04 0.357 5.72 1.09e- 8 4.97e- 5
head(de_deseq$sig_micro_vs_1G)
## # A tibble: 6 × 7
## ensembl_gen_id baseMean log2FoldChange lfcSE stat pvalue padj
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ENSMUSG00000031167 4644. 1.71 0.200 8.57 1.08e-17 2.94e-13
## 2 ENSMUSG00000045193 11624. 1.28 0.153 8.36 6.48e-17 8.83e-13
## 3 ENSMUSG00000020288 651. -0.889 0.116 -7.67 1.73e-14 1.57e-10
## 4 ENSMUSG00000085511 20.0 2.37 0.365 6.51 7.74e-11 3.52e- 7
## 5 ENSMUSG00000066170 153. 2.49 0.382 6.54 6.33e-11 3.52e- 7
## 6 ENSMUSG00000038763 63.7 2.69 0.412 6.53 6.69e-11 3.52e- 7
# List to save all the visualization plots
de_plots <- list()
# Extract only gene ids from the significant results
sig_gene_ids <- de_deseq$sig_micro_vs_1G$ensembl_gen_id
# Map between ENSEMBL gene ids and gene symbol
ensembl2symbol <- OSD758::gene_expression("long") |>
dplyr::select(ensembl_gen_id, gene_symbol) |>
dplyr::distinct()
# Get normalised counts for significant genes
sig_normalised_counts <- normalised_counts |>
dplyr::filter(ensembl_gen_id %in% sig_gene_ids) |>
dplyr::left_join(ensembl2symbol, by = "ensembl_gen_id") |>
dplyr::select(-ensembl_gen_id) |>
tibble::column_to_rownames("gene_symbol") |>
as.matrix()
# Scale each row: subtract mean and divide by SD.
# The 2 transpositions are required because, by default, scale applies to the columns.
sig_normalised_counts_scaled <- t(scale(t(sig_normalised_counts))) # scale rows, not columns
# Find min and max values to get meaningful colors in heatmaps
range(sig_normalised_counts_scaled)
## [1] -3.324583 5.724152
# Complex heatmap
de_plots$ht <- ComplexHeatmap::Heatmap(sig_normalised_counts_scaled[1:200, ],
name = "Exprs (z-score)",
column_title = "Microgravity vs Earth's Gravity (1G) | Top 200 DE genes",
cluster_columns = TRUE,
cluster_rows = TRUE,
# number of clusters in K-means to split rows
row_km = 2,
# add cluster names
row_title = c("A", "B"),
row_title_rot = 90,
row_gap = unit(2, "mm"),
# number of clusters in K-means to split columns
column_km = 3,
column_gap = unit(2, "mm"),
border = "grey",
na_col = "white",
# Color range (min and max values from sig_normalised_counts_scaled)
col = circlize::colorRamp2(c(-4, 0, 6), c("skyblue3", "white", "forestgreen")),
column_names_gp = grid::gpar(fontsize = 9),
row_names_gp = grid::gpar(fontsize = 5),
rect_gp = grid::gpar(col = "grey", lwd = 0.5))
# Print the plot
ComplexHeatmap::draw(de_plots$ht, heatmap_legend_side = "right")
# Add a column with differential expression status and add gene symbol to the results
sig_res_annot <-
de_deseq$sig_micro_vs_1G |>
dplyr::mutate(diffexpressed = case_when(
log2FoldChange > 1 & padj < 0.05 ~ 'upregulated',
log2FoldChange < -1 & padj < 0.05 ~ 'downregulated',
TRUE ~ 'not_de')) |>
dplyr::left_join(ensembl2symbol, by = "ensembl_gen_id") |>
dplyr::select(-ensembl_gen_id) |>
# add gene symbols
dplyr::relocate(gene_symbol, .before = 1L) |>
dplyr::arrange(padj, log2FoldChange)
# Create a volcano plot using ggplot2
de_plots$volcano_plot <-
ggplot(data = sig_res_annot, aes(
x = log2FoldChange,
y = -log10(padj),
col = diffexpressed))+
geom_point(size = 0.6) +
geom_text_repel(data = filter(sig_res_annot,
((abs(log2FoldChange) > log2(8)) & (padj < -log10(0.05)))),
aes(label = gene_symbol), size = 1.5, max.overlaps = Inf) +
ggtitle("DE genes micro gravity versus Earth's gravity") +
geom_vline(xintercept = c(-1, 1), col = "black", linetype = 'dashed', linewidth = 0.2) +
geom_hline(yintercept = -log10(0.05), col = "black", linetype = 'dashed', linewidth = 0.2) +
theme(plot.title = element_text(size = rel(1), hjust = 0.5),
axis.title = element_text(size = rel(1))) +
scale_color_manual(values = c("upregulated" = "red",
"downregulated" = "blue",
"not_de" = "grey")) +
labs(color = 'DE genes') +
xlim(-5, 5) + # Caution: This hides some genes
ylim(0, 7.5) + # Caution: This hides some genes
theme_light()
# Print the volcano plot
de_plots$volcano_plot
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
# Enrichment analysis (ORA)
# Create a list to save the enrichment analysis results
fun_enrich <- list()
# Prepare list of significant DE genes in descending Log2FoldChange
fun_enrich$de_genes_fc <-
de_deseq$sig_micro_vs_1G |>
dplyr::select(ensembl_gen_id, log2FoldChange) |>
dplyr::arrange(dplyr::desc(log2FoldChange))
# Run GO enrichment analysis using the enrichGO function
fun_enrich$ego <- clusterProfiler::enrichGO(
gene = fun_enrich$de_genes_fc$ensembl_gen_id, # Genes of interest
universe = ensembl2symbol$ensembl_gen_id, # Background gene set
OrgDb = org.Mm.eg.db, # Annotation database
keyType = 'ENSEMBL', # Key type for gene identifiers
readable = TRUE, # Convert gene IDs to gene names
ont = "BP", # Ontology: can be "BP", "MF", "CC", or "ALL"
pvalueCutoff = 0.05, # P-value cutoff for significance
qvalueCutoff = 0.10 # Q-value cutoff for significance
)
# Visualize the enriched GO terms
fun_enrich$dotplot <-
enrichplot::dotplot(fun_enrich$ego, showCategory = 20, title = "GO BP | Enrichment barplot")
fun_enrich$heatplot <-
enrichplot::heatplot(fun_enrich$ego, showCategory = 10,
foldChange = fun_enrich$de_genes_fc$log2FoldChange) +
ggplot2::ggtitle("GO BP | Enrichment heatplot")
fun_enrich$emapplot <-
enrichplot::emapplot(pairwise_termsim(fun_enrich$ego), showCategory = 15, layout = "nicely")
fun_enrich$cnetplot <-
enrichplot::cnetplot(fun_enrich$ego, categorySize = "pvalue", showCategory = 5,
layout = "nicely", foldChange = fun_enrich$de_genes_fc$log2FoldChange)
fun_enrich$treeplot <-
enrichplot::treeplot(enrichplot::pairwise_termsim(fun_enrich$ego),
showCategory = 20, nCluster=5, offset = rel(2)) +
ggplot2::ggtitle("GO BP | Enrichment treeplot") +
ggplot2::theme(text = element_text(size = 8))
## Warning in treeplot.enrichResult(x, ...): Use 'offset.params = list(bar_tree = your_value)' instead of 'offset'.
## The offset parameter will be removed in the next version.
## Warning in treeplot.enrichResult(x, ...): Use 'cluster.params = list(n = your_value)' instead of 'nCluster'.
## The nCluster parameter will be removed in the next version.
# Combine the enrichment plots into panels from a single figure
(fun_enrich$dotplot) |
(fun_enrich$emapplot / fun_enrich$cnetplot)
## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.