R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

# setwd("C:/Users/ManXiaohui/Seurat/Reproduce_Publication_ProjectsData_Integration")
# getwd()

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(Seurat)
## Attaching SeuratObject
library(patchwork)

# <!-- install.packages("tidyverse") -->
# <!-- install.packages("Matrix") -->
# <!-- install.packages("RCurl") -->
# <!-- install.packages("scales") -->
# <!-- install.packages("cowplot") -->

library(Seurat)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## Registered S3 method overwritten by 'cli':
##   method     from         
##   print.boxx spatstat.geom
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.0     v stringr 1.4.0
## v tidyr   1.1.2     v forcats 0.5.1
## v readr   1.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(Matrix)
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(RCurl)
## 
## Attaching package: 'RCurl'
## The following object is masked from 'package:tidyr':
## 
##     complete
library(scales)
## Warning: package 'scales' was built under R version 4.0.5
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
library(cowplot)
## Warning: package 'cowplot' was built under R version 4.0.5
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:patchwork':
## 
##     align_plots
library(SingleCellExperiment)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:Matrix':
## 
##     expand
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:purrr':
## 
##     reduce
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:grDevices':
## 
##     windows
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
## 
## Attaching package: 'SummarizedExperiment'
## The following object is masked from 'package:SeuratObject':
## 
##     Assays
## The following object is masked from 'package:Seurat':
## 
##     Assays
## Prepare the data

# Load the PBMC dataset

tenx.data <- Read10X(data.dir = "C:/Users/ManXiaohui/Seurat/pbmc3k_filtered_gene_bc_matrices/filtered_gene_bc_matrices/hg19")

seqwell.data <- read.table(paste0("C:/Users/ManXiaohui/Seurat/Reproduce_Publication_Projects/Data_Integration/Mouse_Human/GSM2486333_PBMC.txt"))


# Setup Seurat objects
# since both count matrices have already filtered cells, we do no additional filtering here

seqwell <- CreateSeuratObject(counts = seqwell.data,project = "seqwell", min.cells = 3, min.features = 200)
 
tenx <- CreateSeuratObject(counts = tenx.data,project = "tenx", min.cells = 3, min.features = 200)
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
# NORMALIZING THE DATA
seqwell <- NormalizeData(seqwell, normalization.method = "LogNormalize", scale.factor = 10000)
tenx <- NormalizeData(tenx, normalization.method = "LogNormalize", scale.factor = 10000)

# IDENTIFICATION OF HIGHLY VARIABLE FEATURES (FEATURE SELECTION)
seqwell <- FindVariableFeatures(seqwell, selection.method = "vst", nfeatures = 2000)
tenx <- FindVariableFeatures(tenx, selection.method = "vst", nfeatures = 2000)

# Add columns "platform" for metadata 

seqwell <- AddMetaData(seqwell, metadata="seqwell", col.name="platform")

tenx <-AddMetaData(tenx, metadata="tenx", col.name="platform")

# Create list for data integration

ptlist <- list(seqwell, tenx)

ptlist
## [[1]]
## An object of class Seurat 
## 6713 features across 4296 samples within 1 assay 
## Active assay: RNA (6713 features, 2000 variable features)
## 
## [[2]]
## An object of class Seurat 
## 13714 features across 2700 samples within 1 assay 
## Active assay: RNA (13714 features, 2000 variable features)
# select features that are repeatedly variable across datasets for integration
features <- SelectIntegrationFeatures(object.list = ptlist)

## Perform integration: We then identify anchors using the FindIntegrationAnchors() function, which takes a list of Seurat objects as input, and use these anchors to integrate the two datasets together with IntegrateData().

immune.anchors <- FindIntegrationAnchors(object.list = ptlist, anchor.features = features)
## Scaling features for provided objects
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 10143 anchors
## Filtering anchors
##  Retained 3213 anchors
# this command creates an 'integrated' data assay
immune.combined <- IntegrateData(anchorset = immune.anchors)
## Merging dataset 2 into 1
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
# specify that we will perform downstream analysis on the corrected data note that the original
# unmodified data still resides in the 'RNA' assay
DefaultAssay(immune.combined) <- "integrated"

# Run the standard workflow for visualization and clustering
immune.combined <- ScaleData(immune.combined, verbose = FALSE)
immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE)
immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:30)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 10:37:52 UMAP embedding parameters a = 0.9922 b = 1.112
## 10:37:52 Read 6996 rows and found 30 numeric columns
## 10:37:52 Using Annoy for neighbor search, n_neighbors = 30
## 10:37:52 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 10:37:53 Writing NN index file to temp file C:\Users\MANXIA~1\AppData\Local\Temp\Rtmpy43vxg\file3b0c3d17e18
## 10:37:53 Searching Annoy index using 1 thread, search_k = 3000
## 10:37:55 Annoy recall = 100%
## 10:37:55 Commencing smooth kNN distance calibration using 1 thread
## 10:37:57 Initializing from normalized Laplacian + noise
## 10:37:57 Commencing optimization for 500 epochs, with 326984 positive edges
## 10:38:18 Optimization finished
immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
immune.combined <- FindClusters(immune.combined, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6996
## Number of edges: 362559
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8890
## Number of communities: 11
## Elapsed time: 1 seconds
# Visualization
# Visualization
p1 <- DimPlot(immune.combined, reduction = "umap", group.by = "platform")
p2 <- DimPlot(immune.combined, reduction = "umap", label = TRUE, repel = TRUE)
p1 + p2

# To visualize the two conditions side-by-side, we can use the split.by argument to show each condition colored by cluster.

DimPlot(immune.combined, reduction = "umap", split.by = "platform")

## Identify conserved cell type markers
# For performing differential expression after integration, we switch back to the original data
# need to run for all clusters to find the feature marker
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 6, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 0, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 1, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 2, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 3, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 4, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 5, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 6, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 7, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 8, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 9, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 10, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->


#We can explore these marker genes for each cluster and use them to annotate our clusters as specific cell types.

FeaturePlot(immune.combined, features = c("CD3D", "SELL", "S100A4", "CD8A", "GNLY", "MS4A1","FCGR3A", "HSP90AB1", "CCR7"), min.cutoff = "q9")

immune.combined <- RenameIdents(immune.combined,`0`="CD14 Mono",`1`="CD4 Memory T", `2` ="CD4 Naive T", `3` ="CD8 T", `4` ="B", `5` ="NK", `6` ="CD16 Mono",`7`="HS_Stress", `8` ="DC") 

DimPlot(immune.combined, label = TRUE)

# The DotPlot() function with the split.by parameter can be useful for viewing conserved cell type markers across conditions, showing both the expression level and the percentage of cells in a cluster expressing any given gene. Here we plot 2-3 strong marker genes for each of our 14 clusters.


Idents(immune.combined) <- factor(Idents(immune.combined), levels = c("HSPC", "Mono/Mk Doublets","pDC", "Eryth", "Mk", "DC", "CD14 Mono", "CD16 Mono", "B Activated", "B", "CD8 T", "NK", "T activated","CD4 Naive T", "CD4 Memory T"))
markers.to.plot <- c("CD3D", "CREM", "HSPH1", "SELL", "GIMAP5", "CACYBP", "GNLY", "NKG7", "CCL5","CD8A", "MS4A1", "CD79A", "MIR155HG", "NME1", "FCGR3A", "VMO1", "CCL2", "S100A9", "HLA-DQA1", "GPR183", "PPBP", "GNG11", "TSPAN13", "IL3RA", "IGJ", "PRSS57")
# DotPlot(immune.combined, features = markers.to.plot, cols = c("blue", "red"), dot.scale = 8, split.by = "platform") + RotatedAxis()

```