library(scran) # for scDE
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
##
## windows
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
## Loading required package: scuttle
library(scater) # for aggregate counts
## Loading required package: ggplot2
library(edgeR) #for De
## Loading required package: limma
##
## Attaching package: 'limma'
## The following object is masked from 'package:scater':
##
## plotMDS
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
##
## Attaching package: 'edgeR'
## The following object is masked from 'package:SingleCellExperiment':
##
## cpm
library(here) # reproducible paths
## here() starts at U:/Datastore/CMVM/scs/groups/jpriller-GROUP/scRNAseq/R-analysis-VM
project <- "fire-mice"
sce <- readRDS(here("processed", project, "sce_oligo_corrected.RDS"))
Sum the counts
summed <- aggregateAcrossCells(sce,
id=colData(sce)[,"Sample"])
# create DGElist
dge_summed <- DGEList(counts(summed), samples=colData(summed))
# filter out samples with low number of cells (with such big groups this shouldn't be a problem)
dge_summed <- dge_summed[, summed$ncells >= 10]
# Filter genes with specific function from edgeR
keep <- filterByExpr(dge_summed, group=summed$genotype)
dge_summed <- dge_summed[keep,]
summary(keep)
## Mode FALSE TRUE
## logical 6559 12274
edgeR allows to add a design matrix, with the batch as a covariate, to account for batch differences in the differential expression
dge_summed <- calcNormFactors(dge_summed)
par(mfrow=c(2,4))
for (i in seq_len(ncol(dge_summed))) {
plotMD(dge_summed, column=i)
}
plotMDS(cpm(dge_summed, log=TRUE),
col=ifelse(dge_summed$samples$genotype == "KO", "red", "blue"))
# Build teh design
# Reordering the genotype factor, treated should be second level
dge_summed$samples$genotype <- factor(dge_summed$samples$genotype, levels = c("WT", "KO"))
design <- model.matrix(~factor(chip) + factor(genotype), dge_summed$samples)
# estimate dispersions
dge_summed <- estimateDisp(dge_summed, design)
fit <- glmQLFit(dge_summed, design, robust=TRUE)
# Run DE
de_results <- glmQLFTest(fit, coef=ncol(design))
# save results
write.csv(topTags(de_results, n = 500), here("outs", project, "DE_oligo_edgeR", "de_oligo_edgeR.csv"))
saveRDS(de_results, here("processed", project, "DE_oligo_edgeR_de_results.RDS"))
Output:
LogFC is the log fold-change, which is the log difference between both groups
LogCPM are the log counts per million, which can be understood as measuring expression level.
F is the F-statistic from the quasi-likelihood F-test.
PValue is the nominal p-value derived from F without any multiple testing correction
FDR (False discovery rate) is the PValue after Benjamini-Hochberg correction. For example, the set of genes with adjusted p value less than 0.1 should contain no more than 10% false positives.