Run DoubletFinder
# split aggregated data by sample
table(mouse.filtered$sample)
##
## E3.2M.M E3.2M.F E4.2M.M E4.2M.F E3.14M.M E3.14M.F E4.14M.M E4.14M.F
## 4503 4715 3512 4209 4961 4023 4516 6969
mouse.split <- SplitObject(mouse.filtered, split.by = "sample")
# loop through samples to find doublets
for (i in 1:length(mouse.split)) {
# print the sample we are on
print(paste0("Sample ",i))
# Pre-process seurat object with standard seurat workflow
mouse.sample <- NormalizeData(mouse.split[[i]])
mouse.sample <- FindVariableFeatures(mouse.sample)
mouse.sample <- ScaleData(mouse.sample)
mouse.sample <- RunPCA(mouse.sample, nfeatures.print = 10)
# Find significant PCs
stdv <- mouse.sample[["pca"]]@stdev
sum.stdv <- sum(mouse.sample[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100
cumulative <- cumsum(percent.stdv)
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co2 <- sort(which((percent.stdv[1:length(percent.stdv) - 1] -
percent.stdv[2:length(percent.stdv)]) > 0.1),
decreasing = T)[1] + 1
min.pc <- min(co1, co2)
min.pc
# finish pre-processing
mouse.sample <- RunUMAP(mouse.sample, dims = 1:min.pc)
mouse.sample <- FindNeighbors(object = mouse.sample, dims = 1:min.pc)
mouse.sample <- FindClusters(object = mouse.sample, resolution = 0.1)
# pK identification (no ground-truth)
sweep.list <- paramSweep_v3(mouse.sample, PCs = 1:min.pc, num.cores = detectCores() - 1)
sweep.stats <- summarizeSweep(sweep.list)
bcmvn <- find.pK(sweep.stats)
# Optimal pK is the max of the bomodality coefficent (BCmvn) distribution
bcmvn.max <- bcmvn[which.max(bcmvn$BCmetric),]
optimal.pk <- bcmvn.max$pK
optimal.pk <- as.numeric(levels(optimal.pk))[optimal.pk]
## Homotypic doublet proportion estimate
annotations <- mouse.sample@meta.data$seurat_clusters
homotypic.prop <- modelHomotypic(annotations)
nExp.poi <- round(optimal.pk * nrow(mouse.sample@meta.data)) ## Assuming 7.5% doublet formation rate - tailor for your dataset
nExp.poi.adj <- round(nExp.poi * (1 - homotypic.prop))
# run DoubletFinder
mouse.sample <- doubletFinder_v3(seu = mouse.sample,
PCs = 1:min.pc,
pK = optimal.pk,
nExp = nExp.poi.adj)
metadata <- mouse.sample@meta.data
colnames(metadata)[15] <- "doublet_finder"
mouse.sample@meta.data <- metadata
# subset and save
mouse.singlets <- subset(mouse.sample, doublet_finder == "Singlet")
mouse.split[[i]] <- mouse.singlets
remove(mouse.singlets)
}
## [1] "Sample 1"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4715
## Number of edges: 142535
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9724
## Number of communities: 11
## Elapsed time: 0 seconds

## NULL
## [1] "Creating 1572 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
## [1] "Sample 2"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4503
## Number of edges: 139353
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9655
## Number of communities: 9
## Elapsed time: 0 seconds

## NULL
## [1] "Creating 1501 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
## [1] "Sample 3"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4023
## Number of edges: 122114
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9661
## Number of communities: 9
## Elapsed time: 0 seconds

## NULL
## [1] "Creating 1341 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
## [1] "Sample 4"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4961
## Number of edges: 148073
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9715
## Number of communities: 8
## Elapsed time: 0 seconds

## NULL
## [1] "Creating 1654 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
## [1] "Sample 5"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4209
## Number of edges: 123279
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9711
## Number of communities: 10
## Elapsed time: 0 seconds

## NULL
## [1] "Creating 1403 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
## [1] "Sample 6"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3512
## Number of edges: 103211
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9708
## Number of communities: 8
## Elapsed time: 0 seconds

## NULL
## [1] "Creating 1171 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
## [1] "Sample 7"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6969
## Number of edges: 217179
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9720
## Number of communities: 10
## Elapsed time: 0 seconds

## NULL
## [1] "Creating 2323 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
## [1] "Sample 8"
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4516
## Number of edges: 131695
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9688
## Number of communities: 10
## Elapsed time: 0 seconds

## NULL
## [1] "Creating 1505 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
# converge mouse.split
mouse.singlets <- merge(x = mouse.split[[1]],
y = c(mouse.split[[2]], mouse.split[[3]], mouse.split[[4]],
mouse.split[[5]], mouse.split[[6]], mouse.split[[7]],
mouse.split[[8]]),
project = "Mouse scRNAseq")
mouse.singlets
## An object of class Seurat
## 21097 features across 35214 samples within 1 assay
## Active assay: RNA (21097 features, 0 variable features)
# compare before and after doublet finder
mouse.filtered
## An object of class Seurat
## 21097 features across 37408 samples within 1 assay
## Active assay: RNA (21097 features, 0 variable features)
mouse.singlets
## An object of class Seurat
## 21097 features across 35214 samples within 1 assay
## Active assay: RNA (21097 features, 0 variable features)