library(Seurat)
library(dplyr)
library(Matrix)
library(sva)
bipolar_dge# read in the sparse matrix
bipolar_dge = readMM("data/bp_3k_cells.sparse.matrix")
# read in the gene names (row names)
gene_names = readLines("data/bp_3k_cells.genes.txt")
barcode_names = readLines("data/bp_3k_cells.barcodes.txt")
# assign row and column names for our sparse matrix
rownames(bipolar_dge) = gene_names
colnames(bipolar_dge) = barcode_names
bp = CreateSeuratObject(raw.data = bipolar_dge, min.cells = 3)
mito.genes <- grep(pattern = "^mt-", x = rownames(x = bp@data), value = TRUE)
percent.mito <- Matrix::colSums(bp@raw.data[mito.genes, ]) / Matrix::colSums(bp@data)
# AddMetaData adds columns to object@data.info, and is a great place to stash QC stats
bp <- AddMetaData(object = bp, metadata = percent.mito, col.name = "percent.mito")
# We filter out cells that have unique gene counts over 2,500 or less than 200
# Note that low.thresholds and high.thresholds are used to define a 'gate'
# -Inf and Inf should be used if you don't want a lower or upper threshold.
bp <- FilterCells(object = bp, subset.names = c("percent.mito"), low.thresholds = c(-Inf), high.thresholds = c(0.05))
bp <- FilterCells(object = bp, subset.names = c("nGene"), low.thresholds = c(500), high.thresholds = c(2000))
print(dim(bp@data))
## [1] 14490 1582
bp <- NormalizeData(object = bp, normalization.method = "LogNormalize", scale.factor = 1e4)
bp <- FindVariableGenes(object = bp, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.2, x.high.cutoff = 4, y.cutoff = 1.0)
length(x = bp@var.genes)
## [1] 667
bp <- ScaleData(object = bp)
## [1] "Scaling data matrix"
##
|
| | 0%
|
|=================================================================| 100%
bp <- RunPCA(object = bp, pc.genes = bp@var.genes, do.print = FALSE, pcs.print = 1:2, pcs.compute = 40, maxit = 500, weight.by.var = FALSE)
PCAPlot(object = bp, dim.1 = 1, dim.2 = 2)
bp <- RunTSNE(object = bp, dims.use = 1:10, do.fast = TRUE)
TSNEPlot(object = bp)
FeaturePlot(bp, features.plot=c('nUMI'))
Bipolar 5 and 6 belong to the second batch, all others are the first batch.
batchname = bp@meta.data$orig.ident
batchid = rep(1,length(batchname))
batchid[batchname=="Bipolar5"] = 2
batchid[batchname=="Bipolar6"] = 2
names(batchid) = rownames(bp@meta.data)
bp <- AddMetaData(object = bp, metadata = batchid, col.name = "batchid")
table(bp@meta.data$batchid)
##
## 1 2
## 873 709
FeaturePlot(bp, features.plot=c('batchid'))
save(bp, file = "bp_pre_batch_correct.Robj")
load("bp_pre_batch_correct.Robj") # restore it
bp@data = bp@data[Matrix::rowSums(bp@data)>0,] #after filtering cells, some genes have zero counts
bp <- ScaleData(object = bp, vars.to.regress = c("batchid"))
## [1] "Regressing out batchid"
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 6%
|
|==== | 7%
|
|===== | 8%
|
|====== | 9%
|
|====== | 10%
|
|======= | 10%
|
|======= | 11%
|
|======== | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 17%
|
|============ | 18%
|
|============ | 19%
|
|============= | 19%
|
|============= | 20%
|
|============= | 21%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 23%
|
|================ | 24%
|
|================ | 25%
|
|================= | 26%
|
|================= | 27%
|
|================== | 28%
|
|=================== | 29%
|
|=================== | 30%
|
|==================== | 30%
|
|==================== | 31%
|
|===================== | 32%
|
|====================== | 33%
|
|====================== | 34%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 37%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 39%
|
|========================== | 40%
|
|========================== | 41%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 43%
|
|============================= | 44%
|
|============================= | 45%
|
|============================== | 46%
|
|============================== | 47%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 50%
|
|================================= | 51%
|
|================================== | 52%
|
|=================================== | 53%
|
|=================================== | 54%
|
|==================================== | 55%
|
|==================================== | 56%
|
|===================================== | 57%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 59%
|
|======================================= | 60%
|
|======================================= | 61%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 63%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 66%
|
|=========================================== | 67%
|
|============================================ | 68%
|
|============================================= | 69%
|
|============================================= | 70%
|
|============================================== | 70%
|
|============================================== | 71%
|
|=============================================== | 72%
|
|================================================ | 73%
|
|================================================ | 74%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 77%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 79%
|
|==================================================== | 80%
|
|==================================================== | 81%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 86%
|
|======================================================== | 87%
|
|========================================================= | 88%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================ | 92%
|
|============================================================= | 93%
|
|============================================================= | 94%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 99%
|
|=================================================================| 100%
## [1] "Scaling data matrix"
##
|
| | 0%
|
|=================================================================| 100%
bp <- RunPCA(object = bp, pc.genes = bp@var.genes, do.print = FALSE, pcs.compute = 40, weight.by.var = FALSE)
bp <- RunTSNE(object = bp, dims.use = 1:10, do.fast = TRUE)
FeaturePlot(bp, features.plot=c('batchid'))
load("bp_pre_batch_correct.Robj") # restore it before running combat on it
library('sva')
m = as.data.frame(as.matrix(bp@data))
m = m[rowSums(m)>0,]
com = ComBat(m, batchid, prior.plots=FALSE, par.prior=TRUE)
## Standardizing Data across genes
bp@data = Matrix(as.matrix(com))
bp = ScaleData(bp)
## [1] "Scaling data matrix"
##
|
| | 0%
|
|=================================================================| 100%
bp <- RunPCA(object = bp, pc.genes = bp@var.genes, do.print = FALSE, pcs.print = 1:2, pcs.compute = 40, weight.by.var = FALSE)
bp <- RunTSNE(object = bp, dims.use = 1:10, do.fast = TRUE)
FeaturePlot(bp, features.plot=c('batchid'))