This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
library(Seurat)
## Attaching SeuratObject
library(SeuratData)
## Registered S3 method overwritten by 'cli':
## method from
## print.boxx spatstat.geom
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## -- Installed datasets ------------------------------------- SeuratData v0.2.1 --
## v ifnb 3.1.0 v pbmc3k 3.1.4
## v panc8 3.0.2 v stxBrain 0.1.1
## -------------------------------------- Key -------------------------------------
## v Dataset loaded successfully
## > Dataset built with a newer version of Seurat than installed
## (?) Unknown version of Seurat installed
library(cowplot)
## Warning: package 'cowplot' was built under R version 4.0.5
library(patchwork)
##
## Attaching package: 'patchwork'
## The following object is masked from 'package:cowplot':
##
## align_plots
Load the PBMC dataset
tenx.data <- Read10X(data.dir = "C:/Users/ManXiaohui/Seurat/pbmc3k_filtered_gene_bc_matrices/filtered_gene_bc_matrices/hg19")
seqwell.data <- read.table(paste0("C:/Users/ManXiaohui/Seurat/Reproduce_Publication_Projects/Data_Integration/Mouse_Human/GSM2486333_PBMC.txt"))
Setup Seurat objects since both count matrices have already filtered cells, we do no additional filtering here
seqwell <- CreateSeuratObject(counts = seqwell.data,project = "seqwell", min.cells = 3, min.features = 200)
tenx <- CreateSeuratObject(counts = tenx.data,project = "tenx", min.cells = 3, min.features = 200)
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
seqwell <- NormalizeData(seqwell, normalization.method = "LogNormalize", scale.factor = 10000)
tenx <- NormalizeData(tenx, normalization.method = "LogNormalize", scale.factor = 10000)
seqwell <- FindVariableFeatures(seqwell, selection.method = "vst", nfeatures = 2000)
tenx <- FindVariableFeatures(tenx, selection.method = "vst", nfeatures = 2000)
seqwell <- AddMetaData(seqwell, metadata="seqwell", col.name="platform")
tenx <-AddMetaData(tenx, metadata="tenx", col.name="platform")
ptlist <- list(seqwell, tenx)
ptlist
## [[1]]
## An object of class Seurat
## 6713 features across 4296 samples within 1 assay
## Active assay: RNA (6713 features, 2000 variable features)
##
## [[2]]
## An object of class Seurat
## 13714 features across 2700 samples within 1 assay
## Active assay: RNA (13714 features, 2000 variable features)
features <- SelectIntegrationFeatures(object.list = ptlist)
immune.anchors <- FindIntegrationAnchors(object.list = ptlist, anchor.features = features)
## Scaling features for provided objects
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 10143 anchors
## Filtering anchors
## Retained 3213 anchors
this command creates an ‘integrated’ data assay
immune.combined <- IntegrateData(anchorset = immune.anchors)
## Merging dataset 2 into 1
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
Specify that we will perform downstream analysis on the corrected data note that the original unmodified data still resides in the ‘RNA’ assay
DefaultAssay(immune.combined) <- "integrated"
Run the standard workflow for visualization and clustering
immune.combined <- ScaleData(immune.combined, verbose = FALSE)
immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE)
immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:30)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 00:37:14 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:37:14 Read 6996 rows and found 30 numeric columns
## 00:37:14 Using Annoy for neighbor search, n_neighbors = 30
## 00:37:14 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:37:16 Writing NN index file to temp file C:\Users\MANXIA~1\AppData\Local\Temp\RtmpAtBoqr\file299079bb4a61
## 00:37:16 Searching Annoy index using 1 thread, search_k = 3000
## 00:37:20 Annoy recall = 100%
## 00:37:21 Commencing smooth kNN distance calibration using 1 thread
## 00:37:22 Initializing from normalized Laplacian + noise
## 00:37:23 Commencing optimization for 500 epochs, with 326984 positive edges
## 00:37:43 Optimization finished
immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
immune.combined <- FindClusters(immune.combined, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6996
## Number of edges: 362559
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8890
## Number of communities: 11
## Elapsed time: 1 seconds
# Visualization
p1 <- DimPlot(immune.combined, reduction = "umap", group.by = "platform")
p2 <- DimPlot(immune.combined, reduction = "umap", label = TRUE, repel = TRUE)
p1 + p2
# To visualize the two conditions side-by-side, we can use the split.by argument to show each condition colored by cluster.
DimPlot(immune.combined, reduction = "umap", split.by = "platform")
## Identify conserved cell type markers
# For performing differential expression after integration, we switch back to the original data
# need to run for all clusters to find the feature marker
# DefaultAssay(immune.combined) <- "RNA"
# nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 6, grouping.var = "platform", verbose = FALSE)
# head(nk.markers)
#We can explore these marker genes for each cluster and use them to annotate our clusters as specific cell types.
FeaturePlot(immune.combined, features = c("CD3D", "SELL", "S100A4", "CD8A", "GNLY", "MS4A1","FCGR3A", "HSP90AB1", "CCR7"), min.cutoff = "q9")
immune.combined <- RenameIdents(immune.combined,`0`="CD14 Mono",`1`="CD4 Memory T", `2` ="CD4 Naive T", `3` ="CD8 T", `4` ="B", `5` ="NK", `6` ="CD16 Mono",`7`="HS_Stress", `8` ="DC")
DimPlot(immune.combined, label = TRUE)
# The DotPlot() function with the split.by parameter can be useful for viewing conserved cell type markers across conditions, showing both the expression level and the percentage of cells in a cluster expressing any given gene. Here we plot 2-3 strong marker genes for each of our 14 clusters.
#
#
# Idents(immune.combined) <- factor(Idents(immune.combined), levels = c("HSPC", "Mono/Mk Doublets","pDC", "Eryth", "Mk", "DC", "CD14 Mono", "CD16 Mono", "B Activated", "B", "CD8 T", "NK", "T activated","CD4 Naive T", "CD4 Memory T"))
# markers.to.plot <- c("CD3D", "CREM", "HSPH1", "SELL", "GIMAP5", "CACYBP", "GNLY", "NKG7", "CCL5","CD8A", "MS4A1", "CD79A", "MIR155HG", "NME1", "FCGR3A", "VMO1", "CCL2", "S100A9", "HLA-DQA1", "GPR183", "PPBP", "GNG11", "TSPAN13", "IL3RA", "IGJ", "PRSS57")
# DotPlot(immune.combined, features = markers.to.plot, cols = c("blue", "red"), dot.scale = 8, split.by = "platform") + RotatedAxis()