nf-core/rnaseq was run using default parameters (STAR/Salmon alignment/quantification). The only changes that were made to the fork were in the configuration profile, where params.memory had to be manually removed to run on our HPC.
nextflow -bg run BarryDigby/rnaseq -profile nuig,singularity --input 'rna_seq_files.csv' --fasta 'hg18/genome.fa' --gtf 'hg18/genes.gtf' --max_cpus 16Reference hg18 files were downloaded using the most current release available in the iGenomes database. The samples.csv file supplied to nf-core/rnaseq is given below.
| sample | fastq_1 | fastq_2 | strandedness |
|---|---|---|---|
| METTL3_KD1 | /data/bdigby/leipzig/fastq/METTL3/METTL3_KD1.fastq.gz | unstranded | |
| METTL3_KD2 | /data/bdigby/leipzig/fastq/METTL3/METTL3_KD2.fastq.gz | unstranded | |
| Mock_control1 | /data/bdigby/leipzig/fastq/METTL3/Mock_control1.fastq.gz | unstranded | |
| Mock_control2 | /data/bdigby/leipzig/fastq/METTL3/Mock_control2.fastq.gz | unstranded |
library(DESeq2)
library(dplyr)
datrds <- readRDS("/data/projects/leipzig/salmon_quant/salmon.merged.gene_counts.rds")
cts <- datrds@assays@data$counts %>% mutate_if(is.numeric, round)
meta <- data.frame(row.names=colnames(cts),
"condition"=c("KD", "KD", "CTRL", "CTRL"),
"replicates"=as.factor(c("1", "2", "1", "2")))
dds <- DESeqDataSetFromMatrix(cts, colData = meta, design = replicates ~ condition)
dds$condition <- relevel(dds$condition, ref="CTRL")
dds <- DESeq(dds, quiet=T)vsd <- assay(vst(dds, blind=FALSE))
#meanSdPlot(vsd)
library(pheatmap)
library(RColorBrewer)
sampleDists <- dist(t(vsd))
sampleDistMatrix <- as.matrix(sampleDists)
colnames(sampleDistMatrix) <- rownames(meta)
pheatmap(mat=sampleDistMatrix,
show_rownames = TRUE,
cluster_cols = TRUE,
cluster_rows = TRUE,
annotation_col = meta,
clustering_distance_rows=sampleDists,
clustering_distance_cols=sampleDists,
col=colorRampPalette(rev(brewer.pal(9, "Blues")) )(255))library(PCAtools)
p <- pca(vsd, metadata = meta, center = TRUE, removeVar = 0.1)
biplot(p,
# Loadings
showLoadings = FALSE,
ntopLoadings = 5,
showLoadingsNames = TRUE,
colLoadingsNames = 'black',
colLoadingsArrows = 'black',
alphaLoadingsArrow = 0.5,
sizeLoadingsNames = 4,
boxedLoadingsNames = FALSE,
drawConnectorsLoadings = TRUE,
widthConnectorsLoadings = 1,
colConnectorsLoadings = 'green',
# color points
colby = 'condition',
colkey = c("CTRL"="royalblue", "KD"="red"),
colLegendTitle = "Condition",
shape = NULL,
shapekey = c("ER-"=1, "ER+"=3),
shapeLegendTitle = "shapes",
pointSize = 2.5,
# legends
legendPosition = "right",
legendLabSize = 10,
legendTitleSize = 10,
legendIconSize = 4,
# Encircle (does not try to force fit)
encircle = FALSE,
encircleFill = FALSE,
encircleFillKey = NULL,
encircleAlpha = 0.8,
encircleLineSize = 1,
encircleLineCol = 'black',
# ellipse
ellipse = F,
ellipseType = "t",
ellipseFill = FALSE,
ellipseFillKey = NULL,
ellipseAlpha = 0.8,
ellipseLineSize = 1,
ellipseLineCol = 'black',
# plot dims
#xlim = c(-50,50),
#ylim = c(-30,30),
hline = 0,
hlineType = c("dashed"),
hlineWidth = 0.5,
hlineCol = 'black',
vline = 0,
vlineType = c("dashed"),
vlineWidth = 0.5,
vlineCol = 'black',
gridlines.major = TRUE,
gridlines.minor = TRUE,
borderWidth = 0.75,
borderColour = 'black',
# labs
#xlab = "foo",
#ylab = "bar",
title = "METTL3 KD vs. Mock Control",
subtitle = "",
caption = "",
titleLabSize = 15,
subtitleLabSize = 10,
captionLabSize = 5,
lab = rownames(meta),
labSize = 3,
boxedLabels = T,
#selectLab = rownames(meta),
drawConnectors = TRUE,
colConnectors = "grey50",
directionConnectors = "both")library(IHW)
library(apeglm)
library(DT)
resultsNames(dds)## [1] "Intercept" "condition_KD_vs_CTRL"
res <- results(dds, filterFun=ihw, alpha=0.05, c("condition", "KD", "CTRL"))
LFC <- lfcShrink(dds = dds, res= res, coef = 2, type = "apeglm")
LFC_df <- as.data.frame(LFC)get_upregulated <- function(df){
key <- intersect(rownames(df)[which(df$log2FoldChange>=1)], rownames(df)[which(df$pvalue<=0.05)])
results <- as.data.frame((df)[which(rownames(df) %in% key),])
results <- results[order(-results$log2FoldChange),]
return(results)
}
get_downregulated <- function(df){
key <- intersect(rownames(df)[which(df$log2FoldChange<=-1)], rownames(df)[which(df$pvalue<=0.05)])
results <- as.data.frame((df)[which(rownames(df) %in% key),])
results <- results[order(results$log2FoldChange),]
return(results)
}up <- get_upregulated(LFC)
down <- get_downregulated(LFC)
library(data.table)
up_sub <- subset(up, select=c("log2FoldChange", "padj"))
DT::datatable(up_sub)down_sub <- subset(down, select=c("log2FoldChange", "padj"))
DT::datatable(down_sub)