Load the outs of the cellranger pipeline directly into R and create a seurat object from the combined contents. The outs should include a matrix file, gene file, and barcode file. This program is designed to read a specific file tree so be sure name everything in the way outlined below:
#Read in 10x data from directory that has to follow the branch structure ".../outs/filtered_gene_bc_matrices/ref/" where ref is the reference used ie mm10 or GRCh3
P10C <- Read10X(data.dir = "counts/P10C/outs/filtered_genes_bc_matrices/mm10")
P10C <- CreateSeuratObject(counts = P10C, project = "P10C")
#find percent mitochondrial reads for each cell by looking up genes with mt in them. then add to metadata
P10C$percent.mt <- PercentageFeatureSet(P10C, pattern = "^mt-")
# add metadata column describing the method for each dataset - i.e. single cells vs single nuclei
P10C$method <- "cell"
VlnPlot(P10C, features = c("nCount_RNA", "nFeature_RNA", "percent.mt"), pt.size = 0, log = TRUE, ncol = 3) # log scale
VlnPlot(P10C, features = c("nCount_RNA", "nFeature_RNA", "percent.mt"), pt.size = 0, log = FALSE, ncol = 3) # linear scale
#to see individual data points change 'pt.size' to non-zero number
#QC filters based on careful examination of graphs above. Goal is to remove outliers.
#P2 and P8 had bimodal distribution of feature (gene) counts; chose to filter most of the lower-count group. Could even go more aggressive on that.
P10C <- subset(P10C, subset = nFeature_RNA > 750 & nFeature_RNA < 5000 & percent.mt < 10 & nCount_RNA > 1500 & nCount_RNA < 3000)
See detailed info on Seurat website (https://satijalab.org/seurat/ and in this paper https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1):
# Normalization with SCtransform
# 'glmGamPoi' method improves speed of the calculation but is not strictly necessary
P10C <- SCTransform(P10C, vars.to.regress = "percent.mt", method = "glmGamPoi", verbose = FALSE)
#verbose=FALSE prevents unnecessary output during loop
P10C <- RunPCA(P10C, verbose = FALSE)
P10C <- RunUMAP(P10C, dims = 1:30, verbose = FALSE)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
P10C <- FindNeighbors(P10C, dims = 1:30, verbose = FALSE)
P10C <- FindClusters(P10C, verbose = FALSE)
#check whether 30 dimensions was an appropriate cutoff:
ElbowPlot(P10C, ndims = 50, reduction = "pca")
This was taken from Jeremy’s script (scTransform_subsetting astros & micros).
#Quality control UMAP plots
FeaturePlot(P10C, features = "nFeature_RNA", label = TRUE)
FeaturePlot(P10C, features = "nCount_SCT", label = TRUE)
#examine clusters and gene expression in UMAP plots
fplot_genes <- c("Cx3cr1", "Pdgfra", "Pax2", "Isl1", "Aqp4", "Rcvrn")
DimPlot(P10C, label = TRUE)
FeaturePlot(P10C, features = c("Cx3cr1"), cols = c("gray", "red"))
vplot_genes <- c("Csf1r", "P2ry12", "Itgam", "Sox9", "Pdgfra", "Pax2", "Gfap", "Isl1", "Crx", "Vsx2")
# c("Pdgfra", "Pax2", "Sox9", "Crip1" , "H19", "Ccnd2", "Tesc", "Epas1", "Vegfa", "Igfbp5", "Fn1", "Nnat", "Adm", "Igf2")
VlnPlot(P10C, features = vplot_genes, assay = "SCT", idents = c(0, 1, 6, 7, 8, 11, 12, 13, 14), pt.size = 0)
## Warning: Could not find Pdgfra in the default search locations, found in RNA
## assay instead
## Warning: Could not find Pax2 in the default search locations, found in RNA assay
## instead
## Warning: Could not find Gfap in the default search locations, found in RNA assay
## instead
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of rna_Pax2.
VlnPlot(P10C, features = vplot_genes, assay = "SCT", pt.size = 0)
## Warning: Could not find Pdgfra in the default search locations, found in RNA
## assay instead
## Warning: Could not find Pax2 in the default search locations, found in RNA assay
## instead
## Warning: Could not find Gfap in the default search locations, found in RNA assay
## instead
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of rna_Pax2.
# identify microglia clusters
DotPlot(P10C, features = c("Csf1r", "P2ry12", "Itgam", "Sox9", "Pdgfra", "Pax2", "Gfap", "Isl1", "Crx", "Vsx2"))
## Warning: Could not find Pdgfra in the default search locations, found in RNA
## assay instead
## Warning: Could not find Pax2 in the default search locations, found in RNA assay
## instead
## Warning: Could not find Gfap in the default search locations, found in RNA assay
## instead
FeaturePlot(P10C, features = c("Csf1r", "P2ry12", "Itgam", "Sox9", "Pdgfra", "Pax2", "Gfap", "Isl1", "Crx", "Vsx2"))
## Warning: Could not find Pdgfra in the default search locations, found in RNA
## assay instead
## Warning: Could not find Pax2 in the default search locations, found in RNA assay
## instead
## Warning: Could not find Gfap in the default search locations, found in RNA assay
## instead
## Warning in FeaturePlot(P10C, features = c("Csf1r", "P2ry12", "Itgam", "Sox9", :
## All cells have the same value (0) of rna_Pax2.
# create a new object with microglia clusters only
P10C <- subset(x = P10C, idents = c(1, 3, 5, 8))
# save subset object
saveRDS(P10C, "objects/P10_micro_subset.rds")
# rerun SCTransform on subsetted data
P10C <- SCTransform(P10C, assay = "RNA", vars.to.regress = "percent.mt", method = "glmGamPoi", verbose = FALSE)
This was taken from Jeremy’s script (scTransform_integrate astros & micros without X&Y genes).
# obtain lists of variable features
vgenes.micro <- list()
vgenes.micro <- VariableFeatures(P10C)
# assign names to the variable gene vectors.
names(vgenes.micro) <- c("P10C")
genes.to.remove <- c("Xist", "Ddx3y", "Eif2s3y")
vgenes.micro <- vgenes.micro[!vgenes.micro %in% genes.to.remove]
# write modified variable feature lists back to the Seurat objects
VariableFeatures(P10C) <- vgenes.micro