Quantification

nf-core/rnaseq was run using default parameters (STAR/Salmon alignment/quantification). The only changes that were made to the fork were in the configuration profile, where params.memory had to be manually removed to run on our HPC.

nextflow -bg run BarryDigby/rnaseq -profile nuig,singularity --input 'rna_seq_files.csv' --fasta 'hg18/genome.fa' --gtf 'hg18/genes.gtf' --max_cpus 16

Reference hg18 files were downloaded using the most current release available in the iGenomes database. The samples.csv file supplied to nf-core/rnaseq is given below.

sample fastq_1 fastq_2 strandedness
METTL3_KD1 /data/bdigby/leipzig/fastq/METTL3/METTL3_KD1.fastq.gz unstranded
METTL3_KD2 /data/bdigby/leipzig/fastq/METTL3/METTL3_KD2.fastq.gz unstranded
Mock_control1 /data/bdigby/leipzig/fastq/METTL3/Mock_control1.fastq.gz unstranded
Mock_control2 /data/bdigby/leipzig/fastq/METTL3/Mock_control2.fastq.gz unstranded

QC

library(DESeq2)
library(dplyr)

datrds <- readRDS("/data/projects/leipzig/salmon_quant/salmon.merged.gene_counts.rds")

cts <- datrds@assays@data$counts %>% mutate_if(is.numeric, round)

meta <- data.frame(row.names=colnames(cts),
                   "condition"=c("KD", "KD", "CTRL", "CTRL"),
                   "replicates"=as.factor(c("1", "2", "1", "2")))

dds <- DESeqDataSetFromMatrix(cts, colData = meta, design = replicates ~ condition)
dds$condition <- relevel(dds$condition, ref="CTRL")
dds <- DESeq(dds, quiet=T)

Sample HM

vsd <- assay(vst(dds, blind=FALSE))
#meanSdPlot(vsd)
library(pheatmap)
library(RColorBrewer)
sampleDists <- dist(t(vsd))
sampleDistMatrix <- as.matrix(sampleDists)
colnames(sampleDistMatrix) <- rownames(meta)
pheatmap(mat=sampleDistMatrix,
         show_rownames = TRUE,
         cluster_cols = TRUE,
         cluster_rows = TRUE,
         annotation_col = meta,
         clustering_distance_rows=sampleDists,
         clustering_distance_cols=sampleDists,
         col=colorRampPalette(rev(brewer.pal(9, "Blues")) )(255))

PCA

library(PCAtools)
p <- pca(vsd, metadata = meta, center = TRUE, removeVar = 0.1)

biplot(p,
       
       # Loadings
       showLoadings = FALSE,
       ntopLoadings = 5,
       showLoadingsNames = TRUE,
       colLoadingsNames = 'black',
       colLoadingsArrows = 'black',
       alphaLoadingsArrow = 0.5,
       sizeLoadingsNames = 4,
       boxedLoadingsNames = FALSE,
       drawConnectorsLoadings = TRUE,
       widthConnectorsLoadings = 1,
       colConnectorsLoadings = 'green',
       
       # color points
       colby = 'condition',
       colkey = c("CTRL"="royalblue", "KD"="red"),
       colLegendTitle = "Condition",
       shape = NULL,
       shapekey = c("ER-"=1, "ER+"=3),
       shapeLegendTitle = "shapes",
       pointSize = 2.5,
       
       # legends
       legendPosition = "right",
       legendLabSize = 10,
       legendTitleSize = 10,
       legendIconSize = 4,
       
       # Encircle (does not try to force fit)
       encircle = FALSE,
       encircleFill = FALSE,
       encircleFillKey = NULL,
       encircleAlpha = 0.8,
       encircleLineSize = 1,
       encircleLineCol = 'black',
       
       # ellipse
       ellipse = F,
       ellipseType = "t",
       ellipseFill = FALSE,
       ellipseFillKey = NULL,
       ellipseAlpha = 0.8,
       ellipseLineSize = 1,
       ellipseLineCol = 'black',
       
       # plot dims
       #xlim = c(-50,50),
       #ylim = c(-30,30),
       hline = 0,
       hlineType = c("dashed"),
       hlineWidth = 0.5,
       hlineCol = 'black',
       vline = 0,
       vlineType = c("dashed"),
       vlineWidth = 0.5,
       vlineCol = 'black',
       gridlines.major = TRUE,
       gridlines.minor = TRUE,
       borderWidth = 0.75,
       borderColour = 'black',
       
       # labs
       #xlab = "foo",
       #ylab = "bar", 
       title = "METTL3 KD vs. Mock Control",
       subtitle = "",
       caption = "",
       titleLabSize = 15,
       subtitleLabSize = 10,
       captionLabSize = 5,
       lab = rownames(meta),
       labSize = 3,
       boxedLabels = T,
       #selectLab = rownames(meta),
       drawConnectors = TRUE,
       colConnectors = "grey50",
       directionConnectors = "both")

DESeq2 Results

library(IHW)
library(apeglm)
library(DT)
resultsNames(dds)
## [1] "Intercept"            "condition_KD_vs_CTRL"
res <- results(dds, filterFun=ihw, alpha=0.05, c("condition", "KD", "CTRL"))
LFC <- lfcShrink(dds = dds, res= res, coef = 2, type = "apeglm")
LFC_df <- as.data.frame(LFC)
get_upregulated <- function(df){

    key <- intersect(rownames(df)[which(df$log2FoldChange>=1)], rownames(df)[which(df$pvalue<=0.05)])

    results <- as.data.frame((df)[which(rownames(df) %in% key),])
    results <- results[order(-results$log2FoldChange),]
    return(results)
}
get_downregulated <- function(df){

    key <- intersect(rownames(df)[which(df$log2FoldChange<=-1)], rownames(df)[which(df$pvalue<=0.05)])

    results <- as.data.frame((df)[which(rownames(df) %in% key),])
    results <- results[order(results$log2FoldChange),]
    return(results)
}

Up Regulated

up <- get_upregulated(LFC)
down <- get_downregulated(LFC)


library(data.table)
up_sub <- subset(up, select=c("log2FoldChange", "padj"))
DT::datatable(up_sub)

Down Regulated

down_sub <- subset(down, select=c("log2FoldChange", "padj"))
DT::datatable(down_sub)