library(Seurat)
library(SeuratObject)
A single-cell gets encapsulated into a microfluidic droplet and is accompanied by a barcoded bead.
## Method 1 - use the "/filtered_feature_bc_matrix/" folder which contains the tabulated count data.
# Point your path to the "/filtered_feature_bc_matrix/" folder within the outs "folder"
outs_data <- Read10X(data.dir = "/Users/cathal.king/Desktop/SAGC_workshop/data/cellranger/outs/outs/filtered_feature_bc_matrix/")
# Initialize the Seurat object with the raw (non-normalized data).
seurat_object <- CreateSeuratObject(counts = outs_data,
project = "awesome_single_cell",
assay = "RNA")
## Method 2 - use the .h5 file in the outs folder
# This method might require other software packages for reading a h5 file.
# counts <- Read10X_h5(filename = "/Users/cathal.king/Desktop/SAGC_workshop/data/cellranger/outs/outs/filtered_feature_bc_matrix.h5")
# seurat_object <- CreateSeuratObject(counts = counts,
# project = "awesome_single_cell")
SoupX
package. Other notable packages for this include
DecontX.SoupX paper -> https://pubmed.ncbi.nlm.nih.gov/33367645/SoupX can also conveniently return the corrected counts
within a Seurat object. Find the raw gene counts assay in the corrected
object and compare it to the imported data (un-corrected) above.DoubletFinder. However we will not have time for that step
today.Ambient soup and an empty droplet :(
library(SoupX)
# read in the 10x outs folder which contains the raw count data
sc = load10X('/Users/cathal.king/Desktop/SAGC_workshop/data/cellranger/outs/outs/')
## Loading raw count data
## Loading cell-only count data
## Loading extra analysis data where available
#
#sc = autoEstCont(sc) # automatically calculate the contamination fraction
# can manually set contamination fraction with
sc = setContaminationFraction(sc, 0.06)
# remove the background contamination from counts
out = adjustCounts(sc)
## Warning in sparseMatrix(i = out@i[w] + 1, j = out@j[w] + 1, x = out@x[w], :
## 'giveCsparse' is deprecated; setting repr="T" for you
## Expanding counts from 3 clusters to 559 cells.
# make corrected data into a seurat object
seurat_object_corrected = CreateSeuratObject(out)
## compare the assays in the corrected and non-corrected objects
# extract assay from Seurat object
counts_corrected <- as.data.frame(seurat_object_corrected@assays$RNA$counts)
head(counts_corrected)[1:5, 1:5]
## AAACCCAGTCTCTCTG-1 AAACCCATCCATCCGT-1 AAACGAAAGTTCTCTT-1
## Xkr4 1.981669 11.9277069 0
## Gm1992 0.000000 0.8888742 0
## Gm19938 0.000000 0.0000000 0
## Gm37381 0.000000 0.0000000 0
## Rp1 0.000000 0.0000000 0
## AAACGAATCCACAGCG-1 AAAGGTACATCACGGC-1
## Xkr4 0 0
## Gm1992 0 0
## Gm19938 0 0
## Gm37381 0 0
## Rp1 0 0
# extract assay from Seurat object
counts <- as.data.frame(seurat_object@assays$RNA$counts)
head(counts)[1:5, 1:5]
## AAACCCAGTCTCTCTG-1 AAACCCATCCATCCGT-1 AAACGAAAGTTCTCTT-1
## Xkr4 2 12 0
## Gm1992 0 1 0
## Gm19938 0 0 0
## Gm37381 0 0 0
## Rp1 0 0 0
## AAACGAATCCACAGCG-1 AAAGGTACATCACGGC-1
## Xkr4 0 0
## Gm1992 0 0
## Gm19938 0 0
## Gm37381 0 0
## Rp1 0 0
$ symbol.
meta-data can store all other variables in the object such as patient
data, experiment specific data, clustering, cell trajectory info
etc.A count matrix with genes as the rows and cells as the columns. Image credit: https://www.nature.com/articles/s41596-018-0073-y
# explore seurat object
seurat_object
## An object of class Seurat
## 32285 features across 559 samples within 1 assay
## Active assay: RNA (32285 features, 0 variable features)
## 2 layers present: counts, data
# genes by cells are shown with the dim function
dim(seurat_object)
## [1] 32285 559
## the meta-data of the object can be accessed with $ and @
head(seurat_object$orig.ident)
## AAACCCAGTCTCTCTG-1 AAACCCATCCATCCGT-1 AAACGAAAGTTCTCTT-1 AAACGAATCCACAGCG-1
## awesome_single_cell awesome_single_cell awesome_single_cell awesome_single_cell
## AAAGGTACATCACGGC-1 AAAGTGATCTGCCTCA-1
## awesome_single_cell awesome_single_cell
## Levels: awesome_single_cell
head(seurat_object$nCount_RNA)
## AAACCCAGTCTCTCTG-1 AAACCCATCCATCCGT-1 AAACGAAAGTTCTCTT-1 AAACGAATCCACAGCG-1
## 5601 22089 11173 11562
## AAAGGTACATCACGGC-1 AAAGTGATCTGCCTCA-1
## 1087 10788
head(seurat_object@meta.data)
## orig.ident nCount_RNA nFeature_RNA
## AAACCCAGTCTCTCTG-1 awesome_single_cell 5601 2137
## AAACCCATCCATCCGT-1 awesome_single_cell 22089 5061
## AAACGAAAGTTCTCTT-1 awesome_single_cell 11173 4027
## AAACGAATCCACAGCG-1 awesome_single_cell 11562 4047
## AAAGGTACATCACGGC-1 awesome_single_cell 1087 784
## AAAGTGATCTGCCTCA-1 awesome_single_cell 10788 3616
head(seurat_object) # show all columns in the meta-data
## orig.ident nCount_RNA nFeature_RNA
## AAACCCAGTCTCTCTG-1 awesome_single_cell 5601 2137
## AAACCCATCCATCCGT-1 awesome_single_cell 22089 5061
## AAACGAAAGTTCTCTT-1 awesome_single_cell 11173 4027
## AAACGAATCCACAGCG-1 awesome_single_cell 11562 4047
## AAAGGTACATCACGGC-1 awesome_single_cell 1087 784
## AAAGTGATCTGCCTCA-1 awesome_single_cell 10788 3616
## AACACACCAAGAGGTC-1 awesome_single_cell 501 386
## AACACACCACGGCCAT-1 awesome_single_cell 30433 6434
## AACACACCATATCTGG-1 awesome_single_cell 15635 4334
## AACAGGGAGATTAGCA-1 awesome_single_cell 14614 3808
## rownames contain genes
head(rownames(seurat_object))
## [1] "Xkr4" "Gm1992" "Gm19938" "Gm37381" "Rp1" "Sox17"
str(rownames(seurat_object))
## chr [1:32285] "Xkr4" "Gm1992" "Gm19938" "Gm37381" "Rp1" "Sox17" "Gm37587" ...
## column names contain 10x cell barcodes
head(colnames(seurat_object))
## [1] "AAACCCAGTCTCTCTG-1" "AAACCCATCCATCCGT-1" "AAACGAAAGTTCTCTT-1"
## [4] "AAACGAATCCACAGCG-1" "AAAGGTACATCACGGC-1" "AAAGTGATCTGCCTCA-1"
# the raw assay can be accessed with
Assays(seurat_object)
## [1] "RNA"
# extract assay from Seurat object
counts <- as.data.frame(seurat_object@assays$RNA$counts)
# structure of the counts data
#str(counts)
# view the top of the counts data
head(counts)[1:5, 1:5]
## AAACCCAGTCTCTCTG-1 AAACCCATCCATCCGT-1 AAACGAAAGTTCTCTT-1
## Xkr4 2 12 0
## Gm1992 0 1 0
## Gm19938 0 0 0
## Gm37381 0 0 0
## Rp1 0 0 0
## AAACGAATCCACAGCG-1 AAAGGTACATCACGGC-1
## Xkr4 0 0
## Gm1992 0 0
## Gm19938 0 0
## Gm37381 0 0
## Rp1 0 0
# View counts
#View(counts)
## subset single-cell object to only contain certain genes
# define genes
genes <- c("Lypd1", "Kdsr", "Psmd1", "Sp100", "Dner", "Xrcc5", "Atic")
# first check if genes are in the object / row-names
genes %in% rownames(seurat_object)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## gene names for rows and column names for cell / barcode
# subset object to only contain the listed genes
seurat_filtered <- seurat_object[genes,]
# subset object to remove the listed genes
seurat_filtered2 <- seurat_object[genes,]
## Remove only those genes from the object
counts <- as.data.frame(seurat_object@assays$RNA$counts)
counts <- counts[-(which(rownames(counts) %in% genes)),]
seu_obj_fil2 <- subset(seurat_object, features = rownames(counts))
mt (lower case) is used in the reference genome
and for human it is MT. If this is not specified exactly in
the code then the returned values will be incorrect!%MT content can measure cell stress.
# calculate % mitochondrial content for each cell
seurat_object <- PercentageFeatureSet(object = seurat_object, "^mt-", col.name = "percent_mito")
# now check the meta-data again in the Seurat object to see the calculated % MT values.
head(seurat_object)
## orig.ident nCount_RNA nFeature_RNA percent_mito
## AAACCCAGTCTCTCTG-1 awesome_single_cell 5601 2137 17.407606
## AAACCCATCCATCCGT-1 awesome_single_cell 22089 5061 9.348545
## AAACGAAAGTTCTCTT-1 awesome_single_cell 11173 4027 2.497091
## AAACGAATCCACAGCG-1 awesome_single_cell 11562 4047 4.298564
## AAAGGTACATCACGGC-1 awesome_single_cell 1087 784 2.851886
## AAAGTGATCTGCCTCA-1 awesome_single_cell 10788 3616 8.045977
## AACACACCAAGAGGTC-1 awesome_single_cell 501 386 5.788423
## AACACACCACGGCCAT-1 awesome_single_cell 30433 6434 2.651727
## AACACACCATATCTGG-1 awesome_single_cell 15635 4334 11.250400
## AACAGGGAGATTAGCA-1 awesome_single_cell 14614 3808 7.198577
# Visualize QC metrics as a violin plot
VlnPlot(object = seurat_object, features = c("nFeature_RNA", "nCount_RNA", "percent_mito"), ncol = 3)
# FeatureScatter is typically used to visualize feature-feature relationships, but can be used
# for anything calculated by the object, i.e. columns in object metadata, PC scores etc.
plot1 <- FeatureScatter(seurat_object, feature1 = "nCount_RNA", feature2 = "percent_mito") + NoLegend()
plot2 <- FeatureScatter(seurat_object, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + NoLegend()
plot1 + plot2
# filter data by removing cells
seurat_object_qc <- subset(seurat_object, subset = nFeature_RNA < 7500 & nCount_RNA < 4500 & percent_mito < 50)
# check dimensions again of filtered object. It should contain less cells.
dim(seurat_object_qc)
## [1] 32285 336
# re-plot the above violins with the filtered down data
VlnPlot(object = seurat_object_qc, features = c("nFeature_RNA", "nCount_RNA", "percent_mito"), ncol = 3)
# re-plot above scatter plots using the filtered data.
plot1 <- FeatureScatter(seurat_object_qc, feature1 = "nCount_RNA", feature2 = "percent_mito") + NoLegend()
plot2 <- FeatureScatter(seurat_object_qc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + NoLegend()
plot1 + plot2
Idents() function.# check current levels and idents
levels(seurat_object)
## [1] "awesome_single_cell"
head(Idents(seurat_object))
## AAACCCAGTCTCTCTG-1 AAACCCATCCATCCGT-1 AAACGAAAGTTCTCTT-1 AAACGAATCCACAGCG-1
## awesome_single_cell awesome_single_cell awesome_single_cell awesome_single_cell
## AAAGGTACATCACGGC-1 AAAGTGATCTGCCTCA-1
## awesome_single_cell awesome_single_cell
## Levels: awesome_single_cell