Load 10x Data

Load the outs of the cellranger pipeline directly into R and create a seurat object from the combined contents. The outs should include a matrix file, gene file, and barcode file. This program is designed to read a specific file tree so be sure name everything in the way outlined below:

#Read in 10x data from directory that has to follow the branch structure ".../outs/filtered_gene_bc_matrices/ref/" where ref is the reference used ie mm10 or GRCh3
P10C <- Read10X(data.dir = "counts/P10C/outs/filtered_genes_bc_matrices/mm10")
P10C <- CreateSeuratObject(counts = P10C, project = "P10C")
#find percent mitochondrial reads for each cell by looking up genes with mt in them. then add to metadata
P10C$percent.mt <- PercentageFeatureSet(P10C, pattern = "^mt-")

# add metadata column describing the method for each dataset - i.e. single cells vs single nuclei
P10C$method <- "cell"

Filtering 1

Identify cells that fail quality control

plot for each cell: 1) UMI counts, 2) numberof detected genes, 3) percent mitochondrial genes

VlnPlot(P10C, features = c("nCount_RNA", "nFeature_RNA", "percent.mt"), pt.size = 0, log = TRUE, ncol = 3) # log scale

VlnPlot(P10C, features = c("nCount_RNA", "nFeature_RNA", "percent.mt"), pt.size = 0, log = FALSE, ncol = 3) # linear scale

#to see individual data points change 'pt.size' to non-zero number

Filtering 2

Remove cells that do not pass quality control for number of genes

#QC filters based on careful examination of graphs above. Goal is to remove outliers. 

#P2 and P8 had bimodal distribution of feature (gene) counts; chose to filter most of the lower-count group. Could even go more aggressive on that.

P10C <- subset(P10C, subset = nFeature_RNA > 750 & nFeature_RNA < 5000 & percent.mt < 10 & nCount_RNA > 1500 & nCount_RNA < 3000)

Processing individual datasets w/ SCTransform

See detailed info on Seurat website (https://satijalab.org/seurat/ and in this paper https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1):

# Normalization with SCtransform
# 'glmGamPoi' method improves speed of the calculation but is not strictly necessary
P10C <- SCTransform(P10C, vars.to.regress = "percent.mt", method = "glmGamPoi", verbose = FALSE)

#verbose=FALSE prevents unnecessary output during loop
P10C <- RunPCA(P10C, verbose = FALSE)
P10C <- RunUMAP(P10C, dims = 1:30, verbose = FALSE)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
P10C <- FindNeighbors(P10C, dims = 1:30, verbose = FALSE)
P10C <- FindClusters(P10C, verbose = FALSE)

#check whether 30 dimensions was an appropriate cutoff:
ElbowPlot(P10C, ndims = 50, reduction = "pca")

Visualization

This was taken from Jeremy’s script (scTransform_subsetting astros & micros).

#Quality control UMAP plots
FeaturePlot(P10C, features = "nFeature_RNA", label = TRUE)

FeaturePlot(P10C, features = "nCount_SCT", label = TRUE)

#examine clusters and gene expression in UMAP plots
fplot_genes <- c("Cx3cr1", "Pdgfra", "Pax2", "Isl1", "Aqp4", "Rcvrn")

DimPlot(P10C, label = TRUE)

FeaturePlot(P10C, features = c("Cx3cr1"), cols = c("gray", "red"))

vplot_genes <- c("Csf1r", "P2ry12", "Itgam", "Sox9", "Pdgfra", "Pax2", "Gfap", "Isl1", "Crx", "Vsx2")
# c("Pdgfra", "Pax2", "Sox9", "Crip1" , "H19", "Ccnd2", "Tesc", "Epas1", "Vegfa", "Igfbp5", "Fn1", "Nnat", "Adm", "Igf2")

VlnPlot(P10C, features = vplot_genes, assay = "SCT", idents = c(0, 1, 6, 7, 8, 11, 12, 13, 14), pt.size = 0)
## Warning: Could not find Pdgfra in the default search locations, found in RNA
## assay instead
## Warning: Could not find Pax2 in the default search locations, found in RNA assay
## instead
## Warning: Could not find Gfap in the default search locations, found in RNA assay
## instead
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of rna_Pax2.

VlnPlot(P10C, features = vplot_genes, assay = "SCT", pt.size = 0)
## Warning: Could not find Pdgfra in the default search locations, found in RNA
## assay instead
## Warning: Could not find Pax2 in the default search locations, found in RNA assay
## instead
## Warning: Could not find Gfap in the default search locations, found in RNA assay
## instead
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of rna_Pax2.

Subsetting microglia

# identify microglia clusters
DotPlot(P10C, features = c("Csf1r", "P2ry12", "Itgam", "Sox9", "Pdgfra", "Pax2", "Gfap", "Isl1", "Crx", "Vsx2"))
## Warning: Could not find Pdgfra in the default search locations, found in RNA
## assay instead
## Warning: Could not find Pax2 in the default search locations, found in RNA assay
## instead
## Warning: Could not find Gfap in the default search locations, found in RNA assay
## instead

FeaturePlot(P10C, features = c("Csf1r", "P2ry12", "Itgam", "Sox9", "Pdgfra", "Pax2", "Gfap", "Isl1", "Crx", "Vsx2"))
## Warning: Could not find Pdgfra in the default search locations, found in RNA
## assay instead
## Warning: Could not find Pax2 in the default search locations, found in RNA assay
## instead
## Warning: Could not find Gfap in the default search locations, found in RNA assay
## instead
## Warning in FeaturePlot(P10C, features = c("Csf1r", "P2ry12", "Itgam", "Sox9", :
## All cells have the same value (0) of rna_Pax2.

# create a new object with microglia clusters only
P10C <- subset(x = P10C, idents = c(1, 3, 5, 8))

# save subset object
saveRDS(P10C, "objects/P10_micro_subset.rds")

# rerun SCTransform on subsetted data
P10C <-  SCTransform(P10C, assay = "RNA", vars.to.regress = "percent.mt", method = "glmGamPoi", verbose = FALSE)

Remove X and Y chromosome genes from variable features

This was taken from Jeremy’s script (scTransform_integrate astros & micros without X&Y genes).

# obtain lists of variable features
vgenes.micro <- list()
vgenes.micro <- VariableFeatures(P10C)

# assign names to the variable gene vectors.
names(vgenes.micro) <- c("P10C")

genes.to.remove <- c("Xist", "Ddx3y", "Eif2s3y")
vgenes.micro <- vgenes.micro[!vgenes.micro %in% genes.to.remove]

# write modified variable feature lists back to the Seurat objects
VariableFeatures(P10C) <- vgenes.micro