seqwell_tenx_Integration

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

# setwd("C:/Users/ManXiaohui/Seurat/Reproduce_Publication_ProjectsData_Integration")
# getwd()

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(Seurat)

## Attaching SeuratObject

library(patchwork)

# <!-- install.packages("tidyverse") -->
# <!-- install.packages("Matrix") -->
# <!-- install.packages("RCurl") -->
# <!-- install.packages("scales") -->
# <!-- install.packages("cowplot") -->

library(Seurat)
library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.0.5

## Registered S3 method overwritten by 'cli':
##   method     from         
##   print.boxx spatstat.geom

## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --

## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.0     v stringr 1.4.0
## v tidyr   1.1.2     v forcats 0.5.1
## v readr   1.4.0

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(Matrix)

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

library(RCurl)

## 
## Attaching package: 'RCurl'

## The following object is masked from 'package:tidyr':
## 
##     complete

library(scales)

## Warning: package 'scales' was built under R version 4.0.5

## 
## Attaching package: 'scales'

## The following object is masked from 'package:purrr':
## 
##     discard

## The following object is masked from 'package:readr':
## 
##     col_factor

library(cowplot)

## Warning: package 'cowplot' was built under R version 4.0.5

## 
## Attaching package: 'cowplot'

## The following object is masked from 'package:patchwork':
## 
##     align_plots

library(SingleCellExperiment)

## Loading required package: SummarizedExperiment

## Loading required package: MatrixGenerics

## Loading required package: matrixStats

## 
## Attaching package: 'matrixStats'

## The following object is masked from 'package:dplyr':
## 
##     count

## 
## Attaching package: 'MatrixGenerics'

## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars

## Loading required package: GenomicRanges

## Loading required package: stats4

## Loading required package: BiocGenerics

## Loading required package: parallel

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB

## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min

## Loading required package: S4Vectors

## 
## Attaching package: 'S4Vectors'

## The following object is masked from 'package:Matrix':
## 
##     expand

## The following object is masked from 'package:tidyr':
## 
##     expand

## The following objects are masked from 'package:dplyr':
## 
##     first, rename

## The following object is masked from 'package:base':
## 
##     expand.grid

## Loading required package: IRanges

## 
## Attaching package: 'IRanges'

## The following object is masked from 'package:purrr':
## 
##     reduce

## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice

## The following object is masked from 'package:grDevices':
## 
##     windows

## Loading required package: GenomeInfoDb

## Loading required package: Biobase

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

## 
## Attaching package: 'Biobase'

## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians

## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians

## 
## Attaching package: 'SummarizedExperiment'

## The following object is masked from 'package:SeuratObject':
## 
##     Assays

## The following object is masked from 'package:Seurat':
## 
##     Assays

## Prepare the data

# Load the PBMC dataset

tenx.data <- Read10X(data.dir = "C:/Users/ManXiaohui/Seurat/pbmc3k_filtered_gene_bc_matrices/filtered_gene_bc_matrices/hg19")

seqwell.data <- read.table(paste0("C:/Users/ManXiaohui/Seurat/Reproduce_Publication_Projects/Data_Integration/Mouse_Human/GSM2486333_PBMC.txt"))


# Setup Seurat objects
# since both count matrices have already filtered cells, we do no additional filtering here

seqwell <- CreateSeuratObject(counts = seqwell.data,project = "seqwell", min.cells = 3, min.features = 200)
 
tenx <- CreateSeuratObject(counts = tenx.data,project = "tenx", min.cells = 3, min.features = 200)

## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')

# NORMALIZING THE DATA
seqwell <- NormalizeData(seqwell, normalization.method = "LogNormalize", scale.factor = 10000)
tenx <- NormalizeData(tenx, normalization.method = "LogNormalize", scale.factor = 10000)

# IDENTIFICATION OF HIGHLY VARIABLE FEATURES (FEATURE SELECTION)
seqwell <- FindVariableFeatures(seqwell, selection.method = "vst", nfeatures = 2000)
tenx <- FindVariableFeatures(tenx, selection.method = "vst", nfeatures = 2000)

# Add columns "platform" for metadata 

seqwell <- AddMetaData(seqwell, metadata="seqwell", col.name="platform")

tenx <-AddMetaData(tenx, metadata="tenx", col.name="platform")

# Create list for data integration

ptlist <- list(seqwell, tenx)

ptlist

## [[1]]
## An object of class Seurat 
## 6713 features across 4296 samples within 1 assay 
## Active assay: RNA (6713 features, 2000 variable features)
## 
## [[2]]
## An object of class Seurat 
## 13714 features across 2700 samples within 1 assay 
## Active assay: RNA (13714 features, 2000 variable features)

# select features that are repeatedly variable across datasets for integration
features <- SelectIntegrationFeatures(object.list = ptlist)

## Perform integration: We then identify anchors using the FindIntegrationAnchors() function, which takes a list of Seurat objects as input, and use these anchors to integrate the two datasets together with IntegrateData().

immune.anchors <- FindIntegrationAnchors(object.list = ptlist, anchor.features = features)

## Scaling features for provided objects

## Finding all pairwise anchors

## Running CCA

## Merging objects

## Finding neighborhoods

## Finding anchors

##  Found 10143 anchors

## Filtering anchors

##  Retained 3213 anchors

# this command creates an 'integrated' data assay
immune.combined <- IntegrateData(anchorset = immune.anchors)

## Merging dataset 2 into 1

## Extracting anchors for merged samples

## Finding integration vectors

## Finding integration vector weights

## Integrating data

# specify that we will perform downstream analysis on the corrected data note that the original
# unmodified data still resides in the 'RNA' assay
DefaultAssay(immune.combined) <- "integrated"

# Run the standard workflow for visualization and clustering
immune.combined <- ScaleData(immune.combined, verbose = FALSE)
immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE)
immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:30)

## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session

## 10:37:52 UMAP embedding parameters a = 0.9922 b = 1.112

## 10:37:52 Read 6996 rows and found 30 numeric columns

## 10:37:52 Using Annoy for neighbor search, n_neighbors = 30

## 10:37:52 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 10:37:53 Writing NN index file to temp file C:\Users\MANXIA~1\AppData\Local\Temp\Rtmpy43vxg\file3b0c3d17e18
## 10:37:53 Searching Annoy index using 1 thread, search_k = 3000
## 10:37:55 Annoy recall = 100%
## 10:37:55 Commencing smooth kNN distance calibration using 1 thread
## 10:37:57 Initializing from normalized Laplacian + noise
## 10:37:57 Commencing optimization for 500 epochs, with 326984 positive edges
## 10:38:18 Optimization finished

immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:30)

## Computing nearest neighbor graph
## Computing SNN

immune.combined <- FindClusters(immune.combined, resolution = 0.5)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6996
## Number of edges: 362559
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8890
## Number of communities: 11
## Elapsed time: 1 seconds

# Visualization
# Visualization
p1 <- DimPlot(immune.combined, reduction = "umap", group.by = "platform")
p2 <- DimPlot(immune.combined, reduction = "umap", label = TRUE, repel = TRUE)
p1 + p2

# To visualize the two conditions side-by-side, we can use the split.by argument to show each condition colored by cluster.

DimPlot(immune.combined, reduction = "umap", split.by = "platform")

## Identify conserved cell type markers
# For performing differential expression after integration, we switch back to the original data
# need to run for all clusters to find the feature marker
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 6, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 0, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 1, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 2, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 3, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 4, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 5, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 6, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 7, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 8, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 9, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->
# 
# <!-- DefaultAssay(immune.combined) <- "RNA" -->
# <!-- nk.markers <- FindConservedMarkers(immune.combined, ident.1 = 10, grouping.var = "platform", verbose = FALSE) -->
# <!-- head(nk.markers) -->


#We can explore these marker genes for each cluster and use them to annotate our clusters as specific cell types.

FeaturePlot(immune.combined, features = c("CD3D", "SELL", "S100A4", "CD8A", "GNLY", "MS4A1","FCGR3A", "HSP90AB1", "CCR7"), min.cutoff = "q9")

immune.combined <- RenameIdents(immune.combined,`0`="CD14 Mono",`1`="CD4 Memory T", `2` ="CD4 Naive T", `3` ="CD8 T", `4` ="B", `5` ="NK", `6` ="CD16 Mono",`7`="HS_Stress", `8` ="DC") 

DimPlot(immune.combined, label = TRUE)

# The DotPlot() function with the split.by parameter can be useful for viewing conserved cell type markers across conditions, showing both the expression level and the percentage of cells in a cluster expressing any given gene. Here we plot 2-3 strong marker genes for each of our 14 clusters.


Idents(immune.combined) <- factor(Idents(immune.combined), levels = c("HSPC", "Mono/Mk Doublets","pDC", "Eryth", "Mk", "DC", "CD14 Mono", "CD16 Mono", "B Activated", "B", "CD8 T", "NK", "T activated","CD4 Naive T", "CD4 Memory T"))
markers.to.plot <- c("CD3D", "CREM", "HSPH1", "SELL", "GIMAP5", "CACYBP", "GNLY", "NKG7", "CCL5","CD8A", "MS4A1", "CD79A", "MIR155HG", "NME1", "FCGR3A", "VMO1", "CCL2", "S100A9", "HLA-DQA1", "GPR183", "PPBP", "GNG11", "TSPAN13", "IL3RA", "IGJ", "PRSS57")
# DotPlot(immune.combined, features = markers.to.plot, cols = c("blue", "red"), dot.scale = 8, split.by = "platform") + RotatedAxis()

```

seqwell_tenx_Integration

Xiaohui

4/10/2021

R Markdown

Including Plots