RNA-Seq Analysis of Lung Reperfusion Injury

Install and load packages

library(BiocManager)
library(tximport)
library(DESeq2)
library(biomaRt)
library(pheatmap)
library(tidyverse)
library(ggrepel)

Load Sample tables

sample_table <- read_csv(
  "https://raw.githubusercontent.com/sjcockell/mmb8052/main/practicals/practical_08/data/sample_table.csv"
)

sample_table

## # A tibble: 12 × 11
##    Run        GEO_ID  Group Age   Cell_type Instrument Isolate Sample_Name sex  
##    <chr>      <chr>   <chr> <chr> <chr>     <chr>      <chr>   <chr>       <chr>
##  1 SRR7457551 GSM324… Allo… 14 w… Alveolar… NextSeq 5… 24h Re… R05_AM_All… male 
##  2 SRR7457552 GSM324… Allo… 14 w… Alveolar… NextSeq 5… 24h Re… R06_AM_All… male 
##  3 SRR7457553 GSM324… Allo… 14 w… Alveolar… NextSeq 5… 2h Rep… R03_AM_All… male 
##  4 SRR7457554 GSM324… Allo… 14 w… Alveolar… NextSeq 5… 2h Rep… R04_AM_All… male 
##  5 SRR7457555 GSM324… Allo… 14 w… Alveolar… NextSeq 5… 2h Rep… R01_AM_All… male 
##  6 SRR7457556 GSM324… Allo… 14 w… Alveolar… NextSeq 5… 2h Rep… R02_AM_All… male 
##  7 SRR7457557 GSM324… Naive 14 w… Alveolar… NextSeq 5… Naive … N03_AM_Nai… male 
##  8 SRR7457558 GSM324… Naive 14 w… Alveolar… NextSeq 5… Naive … N04_AM_Nai… male 
##  9 SRR7457559 GSM324… Naive 14 w… Alveolar… NextSeq 5… Naive … N01_AM_Nai… male 
## 10 SRR7457560 GSM324… Naive 14 w… Alveolar… NextSeq 5… Naive … N02_AM_Nai… male 
## 11 SRR7457561 GSM324… Allo… 14 w… Alveolar… NextSeq 5… 24h Re… R07_AM_All… male 
## 12 SRR7457562 GSM324… Allo… 14 w… Alveolar… NextSeq 5… 24h Re… R08_AM_All… male 
## # ℹ 2 more variables: STRAIN <chr>, Tissue <chr>

Build vectors of file paths

files <- pull(sample_table, Run)
files <- paste0(files, "/quant.sf")
names(files) <- pull(sample_table, Run)

head(files)

##            SRR7457551            SRR7457552            SRR7457553 
## "SRR7457551/quant.sf" "SRR7457552/quant.sf" "SRR7457553/quant.sf" 
##            SRR7457554            SRR7457555            SRR7457556 
## "SRR7457554/quant.sf" "SRR7457555/quant.sf" "SRR7457556/quant.sf"

Load Transcipts to gene mapping

gene_map <- read_csv(
  "https://github.com/sjcockell/mmb8052/raw/main/practicals/practical_08/extdata/gene_map.csv"
)

Import quantification data with tximport

txi <- tximport(files,
                type = "salmon",
                tx2gene = gene_map,
                ignoreTxVersion = TRUE
)

str(txi)

## List of 4
##  $ abundance          : num [1:55858, 1:12] 82.36 0 34.68 0 0.12 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:55858] "ENSMUSG00000000001" "ENSMUSG00000000003" "ENSMUSG00000000028" "ENSMUSG00000000031" ...
##   .. ..$ : chr [1:12] "SRR7457551" "SRR7457552" "SRR7457553" "SRR7457554" ...
##  $ counts             : num [1:55858, 1:12] 5400 0 1332 0 12 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:55858] "ENSMUSG00000000001" "ENSMUSG00000000003" "ENSMUSG00000000028" "ENSMUSG00000000031" ...
##   .. ..$ : chr [1:12] "SRR7457551" "SRR7457552" "SRR7457553" "SRR7457554" ...
##  $ length             : num [1:55858, 1:12] 3012 550 1764 1409 4592 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:55858] "ENSMUSG00000000001" "ENSMUSG00000000003" "ENSMUSG00000000028" "ENSMUSG00000000031" ...
##   .. ..$ : chr [1:12] "SRR7457551" "SRR7457552" "SRR7457553" "SRR7457554" ...
##  $ countsFromAbundance: chr "no"

Build DESeq2 data object

dds <- DESeqDataSetFromTximport(
  txi,
  colData = sample_table,
  design = ~ Group
)

dds <- DESeq(dds)

plotDispEsts(dds)

rlog transformation with PCA

rld <- rlog(dds)

assay(rld)[1:5, 1:5]

##                    SRR7457551 SRR7457552 SRR7457553 SRR7457554 SRR7457555
## ENSMUSG00000000001  11.984264  12.041706  11.973296  12.011140  11.995211
## ENSMUSG00000000003   0.000000   0.000000   0.000000   0.000000   0.000000
## ENSMUSG00000000028   9.464706   9.634814   7.432353   7.619907   7.384439
## ENSMUSG00000000031   0.000000   0.000000   0.000000   0.000000   0.000000
## ENSMUSG00000000037   2.519403   2.624355   2.393059   3.554851   2.443859

plotPCA(rld, intgroup = "Group")

Sample Distance Heatmap

# Step 1: Compute sample-to-sample distances
sample_distance <- dist(t(assay(rld)), method = "euclidean")
sample_distance_matrix <- as.matrix(sample_distance)

# Step 2: Create annotation for sample groups
heatmap_annotation <- data.frame(
  Group = colData(dds)[, "Group"]
)
rownames(heatmap_annotation) <- rownames(colData(dds))

# Step 3: Plot heatmap with improved color and clustering
pheatmap(
  sample_distance_matrix,
  clustering_distance_rows = sample_distance,
  clustering_distance_cols = sample_distance,
  annotation_col = heatmap_annotation,
  color = colorRampPalette(c("navy", "white", "firebrick3"))(100),
  main = "Sample-to-Sample Distance Heatmap",
  fontsize = 10,
  border_color = NA
)

Volcano plot: Allo24h vs Naive

# Step 1: Prepare results
results_table <- results(dds, contrast = c("Group", "Allo24h", "Naive"))
results_tibble <- as_tibble(results_table, rownames = "ensembl_gene_id") %>%
  filter(complete.cases(.)) %>%
  mutate(
    logPVal = -log10(padj),
    regulation = case_when(
      padj < 0.05 & log2FoldChange > 1 ~ "Upregulated",
      padj < 0.05 & log2FoldChange < -1 ~ "Downregulated",
      TRUE ~ "Not Significant"
    )
  )

# Step 2: Plot volcano with color
ggplot(results_tibble, aes(x = log2FoldChange, y = logPVal, color = regulation)) +
  geom_point(alpha = 0.6, size = 1.5) +
  scale_color_manual(values = c("Upregulated" = "red", "Downregulated" = "blue", "Not Significant" = "grey")) +
  labs(
    title = "Volcano Plot: Allo24h vs Naive",
    x = "log2 Fold Change",
    y = "-log10 Adjusted p-value",
    color = "Regulation"
  ) +
  theme_minimal(base_size = 14)

Volcano plot: Allo2h vs Naive

# Step 1: Extract and prepare results
results_table_2h <- results(dds, contrast = c("Group", "Allo2h", "Naive"))

results_tibble_2h <- as_tibble(results_table_2h, rownames = "ensembl_gene_id") %>%
  filter(complete.cases(.)) %>%
  mutate(
    logPVal = -log10(padj),
    regulation = case_when(
      padj < 0.05 & log2FoldChange > 1 ~ "Upregulated",
      padj < 0.05 & log2FoldChange < -1 ~ "Downregulated",
      TRUE ~ "Not Significant"
    )
  )

# Step 2: Plot volcano with color and theme
ggplot(results_tibble_2h, aes(x = log2FoldChange, y = logPVal, color = regulation)) +
  geom_point(alpha = 0.6, size = 1.5) +
  scale_color_manual(values = c("Upregulated" = "red", "Downregulated" = "blue", "Not Significant" = "grey")) +
  labs(
    title = "Volcano Plot: Allo2h vs Naive",
    x = "log2 Fold Change",
    y = "-log10 Adjusted p-value",
    color = "Regulation"
  ) +
  theme_minimal(base_size = 14)

#Gene Annotation(BiomaRt)

# 1. Get DESeq2 results
res24 <- results(dds, contrast = c("Group", "Allo24h", "Naive"))

# 2. Convert to tibble and filter
res24_tbl <- as_tibble(res24, rownames = "ensembl_gene_id") %>%
  filter(!is.na(padj)) %>%
  mutate(logPVal = -log10(padj))

# 3. THIS is the corrected line — you were missing it
filtered_results <- res24_tbl

# 4. Connect to Ensembl v108
ensembl108 <- useEnsembl(biomart = "ensembl", version = 108)
ensembl108 <- useDataset("mmusculus_gene_ensembl", mart = ensembl108)

# 5. Retrieve annotation
annotation <- getBM(
  attributes = c(
    "ensembl_gene_id", "external_gene_name", "gene_biotype",
    "chromosome_name", "start_position", "end_position",
    "strand", "description"
  ),
  filters = "ensembl_gene_id",
  values = filtered_results$ensembl_gene_id,   # <-- this now works
  mart = ensembl108
)

# 6. Merge annotation
annot_results <- left_join(filtered_results, annotation, by = "ensembl_gene_id")


# 7. Extract DEGs
degs <- annot_results %>%
  filter(abs(log2FoldChange) > 1, padj < 0.05)

# 8. Select top 10 DEGs
top_genes <- degs %>%
  slice_max(abs(log2FoldChange), n = 10)

Volcano plot with upregulated and downregulated

annot_results <- annot_results %>%
  mutate(
    logPVal = -log10(padj),
    regulation = case_when(
      padj < 0.05 & log2FoldChange > 1 ~ "Upregulated",
      padj < 0.05 & log2FoldChange < -1 ~ "Downregulated",
      TRUE ~ "Not Significant"
    )
  )

top_genes <- annot_results %>%
  filter(regulation != "Not Significant") %>%
  slice_max(abs(log2FoldChange), n = 10)

 ggplot(annot_results, aes(x = log2FoldChange, y = logPVal, color = regulation)) +
  geom_point(alpha = 0.6, size = 1.5) +
  scale_color_manual(values = c(
    "Upregulated" = "red",
    "Downregulated" = "blue",
    "Not Significant" = "grey"
  )) +
  geom_text_repel(
    data = top_genes,
    aes(label = external_gene_name),
    size = 3,
    max.overlaps = Inf,
    box.padding = 0.4,
    point.padding = 0.3,
    segment.color = "black"
  ) +
  labs(
    title = "Volcano Plot with Top DE Genes Labelled",
    x = "log2 Fold Change",
    y = "-log10 Adjusted p-value",
    color = "Regulation"
  ) +
  theme_minimal(base_size = 14)

Heatmap of DEGs for Allo2h vs Naive

# 1. DEGs for Allo2h vs Naive
res2h <- results(dds, contrast = c("Group", "Allo2h", "Naive"))
res2h_tbl <- as_tibble(res2h, rownames = "ensembl_gene_id") %>%
  filter(!is.na(padj), padj < 0.05, abs(log2FoldChange) > 1)

# 2. Extract expression matrix
rld_mat <- assay(rld)

deg_mat_2h <- rld_mat[rownames(rld_mat) %in% res2h_tbl$ensembl_gene_id, ]

# 3. Annotation
annotation_col <- data.frame(Group = colData(dds)$Group)
rownames(annotation_col) <- colnames(deg_mat_2h)

# 4. Heatmap
pheatmap(
  deg_mat_2h,
  scale = "row",
  clustering_distance_rows = "euclidean",
  clustering_distance_cols = "euclidean",
  annotation_col = annotation_col,
  color = colorRampPalette(c("navy", "white", "firebrick3"))(100),
  show_rownames = FALSE,
  main = "DEG Heatmap: Allo2h vs Naive"
)

Heatmap of DEGs for Allo24 vs Naive

# 1. DEGs for Allo24h vs Naive
res24 <- results(dds, contrast = c("Group", "Allo24h", "Naive"))
res24_tbl <- as_tibble(res24, rownames = "ensembl_gene_id") %>%
  filter(!is.na(padj), padj < 0.05, abs(log2FoldChange) > 1)

# 2. Extract expression matrix
deg_mat_24h <- rld_mat[rownames(rld_mat) %in% res24_tbl$ensembl_gene_id, ]

# 3. Annotation
annotation_col <- data.frame(Group = colData(dds)$Group)
rownames(annotation_col) <- colnames(deg_mat_24h)

# 4. Heatmap
pheatmap(
  deg_mat_24h,
  scale = "row",
  clustering_distance_rows = "euclidean",
  clustering_distance_cols = "euclidean",
  annotation_col = annotation_col,
  color = colorRampPalette(c("navy", "white", "firebrick3"))(100),
  show_rownames = FALSE,
  main = "DEG Heatmap: Allo24h vs Naive"
)

Top-10 DEGs: Allo24h vs Naive

# Top 5 upregulated (positive log2FC)
top_up <- res24_tbl %>%
  filter(padj < 0.05, log2FoldChange > 1) %>%
  arrange(padj) %>%
  slice(1:5)

# Top 5 downregulated (negative log2FC)
top_down <- res24_tbl %>%
  filter(padj < 0.05, log2FoldChange < -1) %>%
  arrange(padj) %>%
  slice(1:5)

# Combine and annotate
top10_combined <- bind_rows(top_up, top_down) %>%
  left_join(annotation, by = "ensembl_gene_id")

#plot
ggplot(top10_combined, aes(
  x = reorder(external_gene_name, log2FoldChange),
  y = log2FoldChange,
  fill = log2FoldChange > 0
)) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(
    values = c("steelblue", "firebrick"),
    labels = c("Downregulated", "Upregulated")
  ) +
  labs(
    title = "Top 10 DEGs (Up & Down): Allo24h vs Naive",
    x = "Gene",
    y = "log2 Fold Change",
    fill = "Direction"
  ) +
  theme_minimal(base_size = 14)

RNA-Seq Analysis of Lung Reperfusion Injury

Akinjide

Install and load packages

Load Sample tables

Build vectors of file paths

Load Transcipts to gene mapping

Import quantification data with tximport

Build DESeq2 data object

rlog transformation with PCA

Sample Distance Heatmap

Volcano plot: Allo24h vs Naive

Volcano plot: Allo2h vs Naive

Volcano plot with upregulated and downregulated

Heatmap of DEGs for Allo2h vs Naive

Heatmap of DEGs for Allo24 vs Naive

Top-10 DEGs: Allo24h vs Naive