Load the following packages

library(Seurat)
library(SeuratObject)

Import single-cell data into R

A single-cell gets encapsulated into a microfluidic droplet and is accompanied by a barcoded bead.

A single-cell gets encapsulated into a microfluidic droplet and is accompanied by a barcoded bead.

## Method 1 - use the "/filtered_feature_bc_matrix/" folder which contains the tabulated count data.
# Point your path to the "/filtered_feature_bc_matrix/" folder within the outs "folder"
outs_data <- Read10X(data.dir = "/Users/cathal.king/Desktop/SAGC_workshop/data/cellranger/outs/outs/filtered_feature_bc_matrix/")
# Initialize the Seurat object with the raw (non-normalized data).
seurat_object <- CreateSeuratObject(counts = outs_data,
                                 project = "awesome_single_cell",
                                 assay = "RNA")

## Method 2 - use the .h5 file in the outs folder
# This method might require other software packages for reading a h5 file.

# counts <- Read10X_h5(filename = "/Users/cathal.king/Desktop/SAGC_workshop/data/cellranger/outs/outs/filtered_feature_bc_matrix.h5")
# seurat_object <- CreateSeuratObject(counts = counts,
#                                     project = "awesome_single_cell")

Ambient RNA in single-cell droplets

Ambient soup and an empty droplet :(

Ambient soup and an empty droplet :(

library(SoupX)
# read in the 10x outs folder which contains the raw count data
sc = load10X('/Users/cathal.king/Desktop/SAGC_workshop/data/cellranger/outs/outs/')
## Loading raw count data
## Loading cell-only count data
## Loading extra analysis data where available
# 
#sc = autoEstCont(sc) # automatically calculate the contamination fraction
# can manually set contamination fraction with
sc = setContaminationFraction(sc, 0.06)
# remove the background contamination from counts
out = adjustCounts(sc)
## Warning in sparseMatrix(i = out@i[w] + 1, j = out@j[w] + 1, x = out@x[w], :
## 'giveCsparse' is deprecated; setting repr="T" for you
## Expanding counts from 3 clusters to 559 cells.
# make corrected data into a seurat object
seurat_object_corrected = CreateSeuratObject(out)

## compare the assays in the corrected and non-corrected objects
# extract assay from Seurat object
counts_corrected <- as.data.frame(seurat_object_corrected@assays$RNA$counts)
head(counts_corrected)[1:5, 1:5]
##         AAACCCAGTCTCTCTG-1 AAACCCATCCATCCGT-1 AAACGAAAGTTCTCTT-1
## Xkr4              1.981669         11.9277069                  0
## Gm1992            0.000000          0.8888742                  0
## Gm19938           0.000000          0.0000000                  0
## Gm37381           0.000000          0.0000000                  0
## Rp1               0.000000          0.0000000                  0
##         AAACGAATCCACAGCG-1 AAAGGTACATCACGGC-1
## Xkr4                     0                  0
## Gm1992                   0                  0
## Gm19938                  0                  0
## Gm37381                  0                  0
## Rp1                      0                  0
# extract assay from Seurat object
counts <- as.data.frame(seurat_object@assays$RNA$counts)
head(counts)[1:5, 1:5]
##         AAACCCAGTCTCTCTG-1 AAACCCATCCATCCGT-1 AAACGAAAGTTCTCTT-1
## Xkr4                     2                 12                  0
## Gm1992                   0                  1                  0
## Gm19938                  0                  0                  0
## Gm37381                  0                  0                  0
## Rp1                      0                  0                  0
##         AAACGAATCCACAGCG-1 AAAGGTACATCACGGC-1
## Xkr4                     0                  0
## Gm1992                   0                  0
## Gm19938                  0                  0
## Gm37381                  0                  0
## Rp1                      0                  0

Data walk-through

A count matrix with genes as the rows and cells as the columns. *Image credit: https://www.nature.com/articles/s41596-018-0073-y*

A count matrix with genes as the rows and cells as the columns. Image credit: https://www.nature.com/articles/s41596-018-0073-y

# explore seurat object
seurat_object
## An object of class Seurat 
## 32285 features across 559 samples within 1 assay 
## Active assay: RNA (32285 features, 0 variable features)
##  2 layers present: counts, data
# genes by cells are shown with the dim function
dim(seurat_object)
## [1] 32285   559
## the meta-data of the object can be accessed with $ and @
head(seurat_object$orig.ident)
##  AAACCCAGTCTCTCTG-1  AAACCCATCCATCCGT-1  AAACGAAAGTTCTCTT-1  AAACGAATCCACAGCG-1 
## awesome_single_cell awesome_single_cell awesome_single_cell awesome_single_cell 
##  AAAGGTACATCACGGC-1  AAAGTGATCTGCCTCA-1 
## awesome_single_cell awesome_single_cell 
## Levels: awesome_single_cell
head(seurat_object$nCount_RNA)
## AAACCCAGTCTCTCTG-1 AAACCCATCCATCCGT-1 AAACGAAAGTTCTCTT-1 AAACGAATCCACAGCG-1 
##               5601              22089              11173              11562 
## AAAGGTACATCACGGC-1 AAAGTGATCTGCCTCA-1 
##               1087              10788
head(seurat_object@meta.data)
##                             orig.ident nCount_RNA nFeature_RNA
## AAACCCAGTCTCTCTG-1 awesome_single_cell       5601         2137
## AAACCCATCCATCCGT-1 awesome_single_cell      22089         5061
## AAACGAAAGTTCTCTT-1 awesome_single_cell      11173         4027
## AAACGAATCCACAGCG-1 awesome_single_cell      11562         4047
## AAAGGTACATCACGGC-1 awesome_single_cell       1087          784
## AAAGTGATCTGCCTCA-1 awesome_single_cell      10788         3616
head(seurat_object) # show all columns in the meta-data
##                             orig.ident nCount_RNA nFeature_RNA
## AAACCCAGTCTCTCTG-1 awesome_single_cell       5601         2137
## AAACCCATCCATCCGT-1 awesome_single_cell      22089         5061
## AAACGAAAGTTCTCTT-1 awesome_single_cell      11173         4027
## AAACGAATCCACAGCG-1 awesome_single_cell      11562         4047
## AAAGGTACATCACGGC-1 awesome_single_cell       1087          784
## AAAGTGATCTGCCTCA-1 awesome_single_cell      10788         3616
## AACACACCAAGAGGTC-1 awesome_single_cell        501          386
## AACACACCACGGCCAT-1 awesome_single_cell      30433         6434
## AACACACCATATCTGG-1 awesome_single_cell      15635         4334
## AACAGGGAGATTAGCA-1 awesome_single_cell      14614         3808
## rownames contain genes 
head(rownames(seurat_object))
## [1] "Xkr4"    "Gm1992"  "Gm19938" "Gm37381" "Rp1"     "Sox17"
str(rownames(seurat_object))
##  chr [1:32285] "Xkr4" "Gm1992" "Gm19938" "Gm37381" "Rp1" "Sox17" "Gm37587" ...
## column names contain 10x cell barcodes
head(colnames(seurat_object))
## [1] "AAACCCAGTCTCTCTG-1" "AAACCCATCCATCCGT-1" "AAACGAAAGTTCTCTT-1"
## [4] "AAACGAATCCACAGCG-1" "AAAGGTACATCACGGC-1" "AAAGTGATCTGCCTCA-1"

Raw gene counts

# the raw assay can be accessed with
Assays(seurat_object)
## [1] "RNA"
# extract assay from Seurat object
counts <- as.data.frame(seurat_object@assays$RNA$counts)
# structure of the counts data
#str(counts)
# view the top of the counts data
head(counts)[1:5, 1:5]
##         AAACCCAGTCTCTCTG-1 AAACCCATCCATCCGT-1 AAACGAAAGTTCTCTT-1
## Xkr4                     2                 12                  0
## Gm1992                   0                  1                  0
## Gm19938                  0                  0                  0
## Gm37381                  0                  0                  0
## Rp1                      0                  0                  0
##         AAACGAATCCACAGCG-1 AAAGGTACATCACGGC-1
## Xkr4                     0                  0
## Gm1992                   0                  0
## Gm19938                  0                  0
## Gm37381                  0                  0
## Rp1                      0                  0
# View counts
#View(counts)

Subset Seurat object based on a list of genes.

## subset single-cell object to only contain certain genes
# define genes
genes <- c("Lypd1", "Kdsr", "Psmd1", "Sp100", "Dner", "Xrcc5", "Atic")
# first check if genes are in the object / row-names
genes %in% rownames(seurat_object)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## gene names for rows and column names for cell / barcode

# subset object to only contain the listed genes
seurat_filtered <- seurat_object[genes,]
# subset object to remove the listed genes
seurat_filtered2 <- seurat_object[genes,]

## Remove only those genes from the object
counts <- as.data.frame(seurat_object@assays$RNA$counts)
counts <- counts[-(which(rownames(counts) %in% genes)),]
seu_obj_fil2 <- subset(seurat_object, features = rownames(counts))

Data QC

%MT content can measure cell stress.

%MT content can measure cell stress.

# calculate % mitochondrial content for each cell
seurat_object <- PercentageFeatureSet(object = seurat_object, "^mt-", col.name = "percent_mito")

# now check the meta-data again in the Seurat object to see the calculated % MT values.
head(seurat_object)
##                             orig.ident nCount_RNA nFeature_RNA percent_mito
## AAACCCAGTCTCTCTG-1 awesome_single_cell       5601         2137    17.407606
## AAACCCATCCATCCGT-1 awesome_single_cell      22089         5061     9.348545
## AAACGAAAGTTCTCTT-1 awesome_single_cell      11173         4027     2.497091
## AAACGAATCCACAGCG-1 awesome_single_cell      11562         4047     4.298564
## AAAGGTACATCACGGC-1 awesome_single_cell       1087          784     2.851886
## AAAGTGATCTGCCTCA-1 awesome_single_cell      10788         3616     8.045977
## AACACACCAAGAGGTC-1 awesome_single_cell        501          386     5.788423
## AACACACCACGGCCAT-1 awesome_single_cell      30433         6434     2.651727
## AACACACCATATCTGG-1 awesome_single_cell      15635         4334    11.250400
## AACAGGGAGATTAGCA-1 awesome_single_cell      14614         3808     7.198577
# Visualize QC metrics as a violin plot
VlnPlot(object = seurat_object, features = c("nFeature_RNA", "nCount_RNA", "percent_mito"), ncol = 3)

# FeatureScatter is typically used to visualize feature-feature relationships, but can be used
# for anything calculated by the object, i.e. columns in object metadata, PC scores etc.
plot1 <- FeatureScatter(seurat_object, feature1 = "nCount_RNA", feature2 = "percent_mito") + NoLegend()
plot2 <- FeatureScatter(seurat_object, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + NoLegend()
plot1 + plot2

Remove low quality cells from Seurat object.

# filter data by removing cells
seurat_object_qc <- subset(seurat_object, subset = nFeature_RNA < 7500 & nCount_RNA < 4500 & percent_mito < 50)

# check dimensions again of filtered object. It should contain less cells.
dim(seurat_object_qc)
## [1] 32285   336
# re-plot the above violins with the filtered down data
VlnPlot(object = seurat_object_qc, features = c("nFeature_RNA", "nCount_RNA", "percent_mito"), ncol = 3)

# re-plot above scatter plots using the filtered data.
plot1 <- FeatureScatter(seurat_object_qc, feature1 = "nCount_RNA", feature2 = "percent_mito") + NoLegend()
plot2 <- FeatureScatter(seurat_object_qc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + NoLegend()
plot1 + plot2

Levels and Idents are an important part of the Seurat object.

# check current levels and idents
levels(seurat_object)
## [1] "awesome_single_cell"
head(Idents(seurat_object))
##  AAACCCAGTCTCTCTG-1  AAACCCATCCATCCGT-1  AAACGAAAGTTCTCTT-1  AAACGAATCCACAGCG-1 
## awesome_single_cell awesome_single_cell awesome_single_cell awesome_single_cell 
##  AAAGGTACATCACGGC-1  AAAGTGATCTGCCTCA-1 
## awesome_single_cell awesome_single_cell 
## Levels: awesome_single_cell