A cell gets encapsulated into a microfluidic droplet.
## Method 1 - use the "/filtered_feature_bc_matrix/" folder which contains the tabulated count data.
# Point your path to the "/filtered_feature_bc_matrix/" folder within the outs "folder"
outs_data <- Read10X(data.dir = "/Users/cathal.king/Desktop/SAGC_workshop/data/cellranger/outs/outs/filtered_feature_bc_matrix/")
# Initialize the Seurat object with the raw (non-normalized data).
seurat_object <- CreateSeuratObject(counts = outs_data,
project = "awesome_single_cell",
assay = "RNA")
## Method 2 - use the .h5 file in the outs folder
# This method might require other software packages for reading a h5 file.
# counts <- Read10X_h5(filename = "/Users/cathal.king/Desktop/SAGC_workshop/data/cellranger/outs/outs/filtered_feature_bc_matrix.h5")
# seurat_object <- CreateSeuratObject(counts = counts,
# project = "awesome_single_cell")
SoupX
package. Other notable packages for this include
DecontX.SoupX paper -> https://pubmed.ncbi.nlm.nih.gov/33367645/SoupX can also conveniently return the corrected counts
within a Seurat object. Find the raw gene counts assay in the corrected
object and compare it to the imported data (un-corrected) above.DoubletFinder. However we will not have time for that step
today.Ambient soup and an empty droplet :(
library(SoupX)
# read in the 10x outs folder which contains the raw count data
sc = load10X('/Users/cathal.king/Desktop/SAGC_workshop/data/cellranger/outs/outs/')
## Loading raw count data
## Loading cell-only count data
## Loading extra analysis data where available
#
#sc = autoEstCont(sc) # automatically calculate the contamination fraction
# can manually set contamination fraction with
sc = setContaminationFraction(sc, 0.06)
# remove the background contamination from counts
out = adjustCounts(sc)
## Warning in sparseMatrix(i = out@i[w] + 1, j = out@j[w] + 1, x = out@x[w], :
## 'giveCsparse' is deprecated; setting repr="T" for you
## Expanding counts from 3 clusters to 559 cells.
# make corrected data into a seurat object
seurat_object_corrected = CreateSeuratObject(out)
## compare the assays in the corrected and non-corrected objects
# extract assay from Seurat object
counts_corrected <- as.data.frame(seurat_object_corrected@assays$RNA$counts)
head(counts_corrected)[1:5, 1:5]
## AAACCCAGTCTCTCTG-1 AAACCCATCCATCCGT-1 AAACGAAAGTTCTCTT-1
## Xkr4 1.981669 11.9277069 0
## Gm1992 0.000000 0.8888742 0
## Gm19938 0.000000 0.0000000 0
## Gm37381 0.000000 0.0000000 0
## Rp1 0.000000 0.0000000 0
## AAACGAATCCACAGCG-1 AAAGGTACATCACGGC-1
## Xkr4 0 0
## Gm1992 0 0
## Gm19938 0 0
## Gm37381 0 0
## Rp1 0 0
# extract assay from Seurat object
counts <- as.data.frame(seurat_object@assays$RNA$counts)
head(counts)[1:5, 1:5]
## AAACCCAGTCTCTCTG-1 AAACCCATCCATCCGT-1 AAACGAAAGTTCTCTT-1
## Xkr4 2 12 0
## Gm1992 0 1 0
## Gm19938 0 0 0
## Gm37381 0 0 0
## Rp1 0 0 0
## AAACGAATCCACAGCG-1 AAAGGTACATCACGGC-1
## Xkr4 0 0
## Gm1992 0 0
## Gm19938 0 0
## Gm37381 0 0
## Rp1 0 0
$ symbol.
meta-data can store all other variables in the object such as patient
data, experiment specific data, clustering, cell trajectory info
etc.# explore seurat object
seurat_object
## An object of class Seurat
## 32285 features across 559 samples within 1 assay
## Active assay: RNA (32285 features, 0 variable features)
## 2 layers present: counts, data
# genes by cells are shown with the dim function
dim(seurat_object)
## [1] 32285 559
## the meta-data of the object can be accessed with $ and @
head(seurat_object$orig.ident)
## AAACCCAGTCTCTCTG-1 AAACCCATCCATCCGT-1 AAACGAAAGTTCTCTT-1 AAACGAATCCACAGCG-1
## awesome_single_cell awesome_single_cell awesome_single_cell awesome_single_cell
## AAAGGTACATCACGGC-1 AAAGTGATCTGCCTCA-1
## awesome_single_cell awesome_single_cell
## Levels: awesome_single_cell
head(seurat_object$nCount_RNA)
## AAACCCAGTCTCTCTG-1 AAACCCATCCATCCGT-1 AAACGAAAGTTCTCTT-1 AAACGAATCCACAGCG-1
## 5601 22089 11173 11562
## AAAGGTACATCACGGC-1 AAAGTGATCTGCCTCA-1
## 1087 10788
head(seurat_object@meta.data)
## orig.ident nCount_RNA nFeature_RNA
## AAACCCAGTCTCTCTG-1 awesome_single_cell 5601 2137
## AAACCCATCCATCCGT-1 awesome_single_cell 22089 5061
## AAACGAAAGTTCTCTT-1 awesome_single_cell 11173 4027
## AAACGAATCCACAGCG-1 awesome_single_cell 11562 4047
## AAAGGTACATCACGGC-1 awesome_single_cell 1087 784
## AAAGTGATCTGCCTCA-1 awesome_single_cell 10788 3616
head(seurat_object) # show all columns in the meta-data
## orig.ident nCount_RNA nFeature_RNA
## AAACCCAGTCTCTCTG-1 awesome_single_cell 5601 2137
## AAACCCATCCATCCGT-1 awesome_single_cell 22089 5061
## AAACGAAAGTTCTCTT-1 awesome_single_cell 11173 4027
## AAACGAATCCACAGCG-1 awesome_single_cell 11562 4047
## AAAGGTACATCACGGC-1 awesome_single_cell 1087 784
## AAAGTGATCTGCCTCA-1 awesome_single_cell 10788 3616
## AACACACCAAGAGGTC-1 awesome_single_cell 501 386
## AACACACCACGGCCAT-1 awesome_single_cell 30433 6434
## AACACACCATATCTGG-1 awesome_single_cell 15635 4334
## AACAGGGAGATTAGCA-1 awesome_single_cell 14614 3808
## rownames contain genes
head(rownames(seurat_object))
## [1] "Xkr4" "Gm1992" "Gm19938" "Gm37381" "Rp1" "Sox17"
str(rownames(seurat_object))
## chr [1:32285] "Xkr4" "Gm1992" "Gm19938" "Gm37381" "Rp1" "Sox17" "Gm37587" ...
## column names contain 10x cell barcodes
head(colnames(seurat_object))
## [1] "AAACCCAGTCTCTCTG-1" "AAACCCATCCATCCGT-1" "AAACGAAAGTTCTCTT-1"
## [4] "AAACGAATCCACAGCG-1" "AAAGGTACATCACGGC-1" "AAAGTGATCTGCCTCA-1"
# the raw assay can be accessed with
Assays(seurat_object)
## [1] "RNA"
# extract assay from Seurat object
counts <- as.data.frame(seurat_object@assays$RNA$counts)
# structure of the counts data
#str(counts)
# view the top of the counts data
head(counts)[1:5, 1:5]
## AAACCCAGTCTCTCTG-1 AAACCCATCCATCCGT-1 AAACGAAAGTTCTCTT-1
## Xkr4 2 12 0
## Gm1992 0 1 0
## Gm19938 0 0 0
## Gm37381 0 0 0
## Rp1 0 0 0
## AAACGAATCCACAGCG-1 AAAGGTACATCACGGC-1
## Xkr4 0 0
## Gm1992 0 0
## Gm19938 0 0
## Gm37381 0 0
## Rp1 0 0
# View counts
#View(counts)
## subset single-cell object to only contain certain genes
# define genes
genes <- c("Lypd1", "Kdsr", "Psmd1", "Sp100", "Dner", "Xrcc5", "Atic")
# first check if genes are in the object / row-names
genes %in% rownames(seurat_object)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## gene names for rows and column names for cell / barcode
# subset object to only contain the listed genes
seurat_filtered <- seurat_object[genes,]
# subset object to remove the listed genes
seurat_filtered2 <- seurat_object[genes,]
## Remove only those genes from the object
counts <- as.data.frame(seurat_object@assays$RNA$counts)
counts <- counts[-(which(rownames(counts) %in% genes)),]
seu_obj_fil2 <- subset(seurat_object, features = rownames(counts))
mt (lower case) is used in the reference genome
and for human it is MT. If this is not specified exactly in
the code then the returned values will be incorrect!%MT content can measure cell stress.
# calculate % mitochondrial content for each cell
seurat_object <- PercentageFeatureSet(object = seurat_object, "^mt-", col.name = "percent_mito")
# now check the meta-data again in the Seurat object to see the calculated % MT values.
head(seurat_object)
## orig.ident nCount_RNA nFeature_RNA percent_mito
## AAACCCAGTCTCTCTG-1 awesome_single_cell 5601 2137 17.407606
## AAACCCATCCATCCGT-1 awesome_single_cell 22089 5061 9.348545
## AAACGAAAGTTCTCTT-1 awesome_single_cell 11173 4027 2.497091
## AAACGAATCCACAGCG-1 awesome_single_cell 11562 4047 4.298564
## AAAGGTACATCACGGC-1 awesome_single_cell 1087 784 2.851886
## AAAGTGATCTGCCTCA-1 awesome_single_cell 10788 3616 8.045977
## AACACACCAAGAGGTC-1 awesome_single_cell 501 386 5.788423
## AACACACCACGGCCAT-1 awesome_single_cell 30433 6434 2.651727
## AACACACCATATCTGG-1 awesome_single_cell 15635 4334 11.250400
## AACAGGGAGATTAGCA-1 awesome_single_cell 14614 3808 7.198577
# Visualize QC metrics as a violin plot
VlnPlot(object = seurat_object, features = c("nFeature_RNA", "nCount_RNA", "percent_mito"), ncol = 3)
# FeatureScatter is typically used to visualize feature-feature relationships, but can be used
# for anything calculated by the object, i.e. columns in object metadata, PC scores etc.
plot1 <- FeatureScatter(seurat_object, feature1 = "nCount_RNA", feature2 = "percent_mito") + NoLegend()
plot2 <- FeatureScatter(seurat_object, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + NoLegend()
plot1 + plot2
# filter data by removing cells
seurat_object_qc <- subset(seurat_object, subset = nFeature_RNA < 7500 & nCount_RNA < 4500 & percent_mito < 50)
# check dimensions again of filtered object. It should contain less cells.
dim(seurat_object_qc)
## [1] 32285 336
# re-plot the above violins with the filtered down data
VlnPlot(object = seurat_object_qc, features = c("nFeature_RNA", "nCount_RNA", "percent_mito"), ncol = 3)
# re-plot above scatter plots using the filtered data.
plot1 <- FeatureScatter(seurat_object_qc, feature1 = "nCount_RNA", feature2 = "percent_mito") + NoLegend()
plot2 <- FeatureScatter(seurat_object_qc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + NoLegend()
plot1 + plot2
Idents() function.# check current levels and idents
levels(seurat_object)
## [1] "awesome_single_cell"
head(Idents(seurat_object))
## AAACCCAGTCTCTCTG-1 AAACCCATCCATCCGT-1 AAACGAAAGTTCTCTT-1 AAACGAATCCACAGCG-1
## awesome_single_cell awesome_single_cell awesome_single_cell awesome_single_cell
## AAAGGTACATCACGGC-1 AAAGTGATCTGCCTCA-1
## awesome_single_cell awesome_single_cell
## Levels: awesome_single_cell