{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE)

```{r} # # Install the required packages if not already installed # install.packages(c(“pak”)) # # pak::pkg_install(c(“BiocManager”, “remotes”, “here”, “tidyverse”,
# “DESeq2”, “pheatmap”, “RColorBrewer”, “ggrepel”, “clusterProfiler”, # “enrichplot”, “org.Mm.eg.db”, “patchwork”, “ComplexHeatmap” # )) # # # Install the course data package # pak::pak(“patterninstitute/OSD758”)

Load packages

library(“here”) # package to find your current working directory library(“tidyverse”) # packages for data manipulation and visualization library(“DESeq2”) # differential expression analysis library(“pheatmap”) # heatmaps library(“RColorBrewer”) # color palettes library(“ggrepel”) # repel overlapping text labels in ggplot2 plots library(“clusterProfiler”) # for enrichment analysis library(“enrichplot”) # to draw functional enrichment results library(“org.Mm.eg.db”) # mouse gene annotation database library(“patchwork”) # combining multiple plots library(“ComplexHeatmap”) # to draw heatmaps

Install and load package containing the data

library(OSD758)

Gene expression in Counts

raw_counts <- OSD758::gene_expression(format = “wide”, only_expressed_genes = TRUE) # View(raw_counts)

Samples metadata

samples <- OSD758::samples() # View(samples)


```{r}
# Create a list to save the QC results
qc <- list()

# You can choose between vst() and rlog() - this tutorial uses vst.
qc$vst <- DESeq2::vst(raw_counts, blind = TRUE)

```{r} # Run PCA qc\(pca_vst <- prcomp(t(qc\)vst))

Extract the components

qc\(components <- qc\)pca_vst[[“x”]] qc\(components <- tibble::as_tibble(qc\)components, rownames = “sample_id”)

Add sample annotations to components for plot coloring

qc\(components_annot <- dplyr::left_join(qc\)components, as.data.frame(samples[, c(1,5,6,8)]), by = “sample_id”) |> dplyr::relocate(spacecraft, acceleration_source, gravity_class, .after = sample_id)

Calculate the % variance per component

qc\(pca_percent_var <- round(qc\)pca_vst\(sdev^2/sum(qc\)pca_vst$sdev^2)*100)

2D PCA | Using ggplot2

Color by gravity_class

qc\(pca_gravity <- ggplot(qc\)components_annot, aes(x = PC1, y = PC2, color = gravity_class)) + geom_point(size = 3) + labs( title = “PCA gene expression | Colored by gravity_class”, x = paste0(“PC1 (”, qc\(pca_percent_var[1], "% variance)"), y = paste0("PC2 (", qc\)pca_percent_var[2], “% variance)”) ) + theme_minimal()

Color by accelaration_source

qc\(pca_acceleration <- ggplot(qc\)components_annot, aes(x = PC1, y = PC2, color = acceleration_source)) + geom_point(size = 3) + labs( title = “PCA gene expression | Colored by acceleration_source”, x = paste0(“PC1 (”, qc\(pca_percent_var[1], "% variance)"), y = paste0("PC2 (", qc\)pca_percent_var[2], “% variance)”) ) + theme_minimal()

Color by gravity_class and shape by acceleration_source

qc\(pca_gravity_acceleration <- ggplot(qc\)components_annot, aes(x = PC1, y = PC2, color = gravity_class, shape = acceleration_source)) + geom_point(size = 3) + labs( title = “PCA gene expression | Colored by gravity_class | Shape acceleration_source”, x = paste0(“PC1 (”, qc\(pca_percent_var[1], "% variance)"), y = paste0("PC2 (", qc\)pca_percent_var[2], “% variance)”) ) + theme_minimal()

Assemble pca plots

qc\(pca_gravity_acceleration / (qc\)pca_gravity | qc$pca_acceleration)

```{r}

{r cars} summary(cars)

Including Plots

You can also embed plots, for example:

{r pressure, echo=FALSE} plot(pressure)

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.