RNA-Seq Analysis of Lung Reperfusion Injury

Install and load packages

library(BiocManager)
library(tximport)
library(DESeq2)
library(biomaRt)
library(pheatmap)
library(tidyverse)

Load Sample tables

sample_table <- read_csv(
  "https://raw.githubusercontent.com/sjcockell/mmb8052/main/practicals/practical_08/data/sample_table.csv"
)

sample_table

## # A tibble: 12 × 11
##    Run        GEO_ID  Group Age   Cell_type Instrument Isolate Sample_Name sex  
##    <chr>      <chr>   <chr> <chr> <chr>     <chr>      <chr>   <chr>       <chr>
##  1 SRR7457551 GSM324… Allo… 14 w… Alveolar… NextSeq 5… 24h Re… R05_AM_All… male 
##  2 SRR7457552 GSM324… Allo… 14 w… Alveolar… NextSeq 5… 24h Re… R06_AM_All… male 
##  3 SRR7457553 GSM324… Allo… 14 w… Alveolar… NextSeq 5… 2h Rep… R03_AM_All… male 
##  4 SRR7457554 GSM324… Allo… 14 w… Alveolar… NextSeq 5… 2h Rep… R04_AM_All… male 
##  5 SRR7457555 GSM324… Allo… 14 w… Alveolar… NextSeq 5… 2h Rep… R01_AM_All… male 
##  6 SRR7457556 GSM324… Allo… 14 w… Alveolar… NextSeq 5… 2h Rep… R02_AM_All… male 
##  7 SRR7457557 GSM324… Naive 14 w… Alveolar… NextSeq 5… Naive … N03_AM_Nai… male 
##  8 SRR7457558 GSM324… Naive 14 w… Alveolar… NextSeq 5… Naive … N04_AM_Nai… male 
##  9 SRR7457559 GSM324… Naive 14 w… Alveolar… NextSeq 5… Naive … N01_AM_Nai… male 
## 10 SRR7457560 GSM324… Naive 14 w… Alveolar… NextSeq 5… Naive … N02_AM_Nai… male 
## 11 SRR7457561 GSM324… Allo… 14 w… Alveolar… NextSeq 5… 24h Re… R07_AM_All… male 
## 12 SRR7457562 GSM324… Allo… 14 w… Alveolar… NextSeq 5… 24h Re… R08_AM_All… male 
## # ℹ 2 more variables: STRAIN <chr>, Tissue <chr>

Build vectors of file paths

files <- pull(sample_table, Run)
files <- paste0(files, "/quant.sf")
names(files) <- pull(sample_table, Run)

head(files)

##            SRR7457551            SRR7457552            SRR7457553 
## "SRR7457551/quant.sf" "SRR7457552/quant.sf" "SRR7457553/quant.sf" 
##            SRR7457554            SRR7457555            SRR7457556 
## "SRR7457554/quant.sf" "SRR7457555/quant.sf" "SRR7457556/quant.sf"

Load Transcipts to gene mapping

gene_map <- read_csv(
  "https://github.com/sjcockell/mmb8052/raw/main/practicals/practical_08/extdata/gene_map.csv"
)

Import quantification data with tximport

txi <- tximport(files,
                type = "salmon",
                tx2gene = gene_map,
                ignoreTxVersion = TRUE
)

str(txi)

## List of 4
##  $ abundance          : num [1:55858, 1:12] 82.36 0 34.68 0 0.12 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:55858] "ENSMUSG00000000001" "ENSMUSG00000000003" "ENSMUSG00000000028" "ENSMUSG00000000031" ...
##   .. ..$ : chr [1:12] "SRR7457551" "SRR7457552" "SRR7457553" "SRR7457554" ...
##  $ counts             : num [1:55858, 1:12] 5400 0 1332 0 12 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:55858] "ENSMUSG00000000001" "ENSMUSG00000000003" "ENSMUSG00000000028" "ENSMUSG00000000031" ...
##   .. ..$ : chr [1:12] "SRR7457551" "SRR7457552" "SRR7457553" "SRR7457554" ...
##  $ length             : num [1:55858, 1:12] 3012 550 1764 1409 4592 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:55858] "ENSMUSG00000000001" "ENSMUSG00000000003" "ENSMUSG00000000028" "ENSMUSG00000000031" ...
##   .. ..$ : chr [1:12] "SRR7457551" "SRR7457552" "SRR7457553" "SRR7457554" ...
##  $ countsFromAbundance: chr "no"

Build DESeq2 data object

dds <- DESeqDataSetFromTximport(
  txi,
  colData = sample_table,
  design = ~ Group
)

dds <- estimateSizeFactors(dds)
dds <- estimateDispersions(dds)
dds <- nbinomWaldTest(dds)

plotDispEsts(dds)

rlog transformation with PCA

rld <- rlog(dds)

assay(rld)[1:5, 1:5]

##                    SRR7457551 SRR7457552 SRR7457553 SRR7457554 SRR7457555
## ENSMUSG00000000001  11.984264  12.041706  11.973296  12.011140  11.995211
## ENSMUSG00000000003   0.000000   0.000000   0.000000   0.000000   0.000000
## ENSMUSG00000000028   9.464706   9.634814   7.432353   7.619907   7.384439
## ENSMUSG00000000031   0.000000   0.000000   0.000000   0.000000   0.000000
## ENSMUSG00000000037   2.519403   2.624355   2.393059   3.554851   2.443859

plotPCA(rld, intgroup = "Group")

Sample Distance Heatmap

sample_distance <- dist(t(assay(rld)), method = "euclidean")
sample_distance_matrix <- as.matrix(sample_distance)

heatmap_annotation <- data.frame(
  group = colData(dds)[, "Group"],
  row.names = rownames(colData(dds))
)

pheatmap(
  sample_distance_matrix,
  clustering_distance_rows = sample_distance,
  clustering_distance_cols = sample_distance,
  annotation_col = heatmap_annotation
)

Volcano plot: Allo24h vs Naive

results_table <- results(dds, contrast = c("Group", "Allo24h", "Naive"))

results_tibble <- as_tibble(results_table, rownames = "ensembl_gene_id")
filtered_results <- filter(results_tibble, complete.cases(results_tibble))
filtered_results <- mutate(filtered_results, logPVal = -log10(padj))

ggplot(filtered_results, aes(x = log2FoldChange, y = logPVal)) +
  geom_point() +
  labs(
    title = "Volcano Plot: Allo24h vs Naive",
    x = "log2 Fold Change",
    y = "-log10 Adjusted p-value"
  )

Volcano plot: Allo2h vs Naive

results_table_2h <- results(dds, contrast = c("Group", "Allo2h", "Naive"))

results_tibble_2h <- as_tibble(results_table_2h, rownames = "ensembl_gene_id")
filtered_results_2h <- filter(results_tibble_2h, complete.cases(results_tibble_2h))
filtered_results_2h <- mutate(filtered_results_2h, logPVal = -log10(padj))

ggplot(filtered_results_2h, aes(x = log2FoldChange, y = logPVal)) +
  geom_point() +
  labs(
    title = "Volcano Plot: Allo2h vs Naive",
    x = "log2 Fold Change",
    y = "-log10 Adjusted p-value"
  )

#Gene Annotation(BiomaRt)

ensembl108 <- useEnsembl(biomart = "ensembl", version = 108)
ensembl108 <- useDataset("mmusculus_gene_ensembl", mart = ensembl108)

annotation <- getBM(
  attributes = c(
    'ensembl_gene_id', 'chromosome_name', 'start_position',
    'end_position', 'strand', 'gene_biotype',
    'external_gene_name', 'description'
  ),
  filters = 'ensembl_gene_id',
  values = filtered_results$ensembl_gene_id,
  mart = ensembl108
)

annot_results <- left_join(filtered_results, annotation)
annot_results <- arrange(annot_results, padj)

degs <- filter(annot_results, abs(log2FoldChange) > 1 & padj < 0.05)

Gene Biotype Summary Plot

biotype_summary <- annot_results %>%
  count(gene_biotype) %>%
  arrange(desc(n))

ggplot(biotype_summary, aes(x = reorder(gene_biotype, n), y = n)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(
    title = "Distribution of Gene Biotypes Among DEGs",
    x = "Gene Biotype",
    y = "Number of DEGs"
  ) +
  theme_minimal()

Top-10 DEGs: Allo24h vs Naive

res24 <- results(dds, contrast = c("Group", "Allo24h", "Naive"))
res24_tbl <- as_tibble(res24, rownames = "ensembl_gene_id")
res24_tbl <- filter(res24_tbl, !is.na(padj))

top10 <- res24_tbl %>%
  arrange(padj) %>%
  slice(1:10)

top10_annot <- left_join(top10, annotation, by = "ensembl_gene_id")

ggplot(top10_annot, aes(
  x = reorder(external_gene_name, log2FoldChange),
  y = log2FoldChange,
  fill = log2FoldChange > 0
)) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(
    values = c("steelblue", "firebrick"),
    labels = c("Downregulated", "Upregulated")
  ) +
  labs(
    title = "Top 10 DEGs: Allo24h vs Naive",
    x = "Gene",
    y = "log2 Fold Change",
    fill = "Direction"
  ) +
  theme_minimal()

RNA-Seq Analysis of Lung Reperfusion Injury

Akinjide

Install and load packages

Load Sample tables

Build vectors of file paths

Load Transcipts to gene mapping

Import quantification data with tximport

Build DESeq2 data object

rlog transformation with PCA

Sample Distance Heatmap

Volcano plot: Allo24h vs Naive

Volcano plot: Allo2h vs Naive

Gene Biotype Summary Plot

Top-10 DEGs: Allo24h vs Naive