Seurat Batch effect

library(Seurat)

## Attaching SeuratObject

library(SeuratData)

## ── Installed datasets ───────────────────────────────────── SeuratData v0.2.2 ──

## ✓ ifnb    3.1.0                         ✓ pbmcsca 3.0.0
## ✓ panc8   3.0.2

## ────────────────────────────────────── Key ─────────────────────────────────────

## ✓ Dataset loaded successfully
## > Dataset built with a newer version of Seurat than installed
## ❓ Unknown version of Seurat installed

library(SeuratWrappers)

SO <- readRDS("/mnt/nectar_volume/home/eraz0001/new/IFNB.rds")

head(SO)

##                   orig.ident nCount_RNA nFeature_RNA stim seurat_annotations
## AAACATACATTTCC.1 IMMUNE_CTRL       3017          877 CTRL          CD14 Mono
## AAACATACCAGAAA.1 IMMUNE_CTRL       2481          713 CTRL          CD14 Mono
## AAACATACCTCGCT.1 IMMUNE_CTRL       3420          850 CTRL          CD14 Mono
## AAACATACCTGGTA.1 IMMUNE_CTRL       3156         1109 CTRL                pDC
## AAACATACGATGAA.1 IMMUNE_CTRL       1868          634 CTRL       CD4 Memory T
## AAACATACGGCATT.1 IMMUNE_CTRL       1581          557 CTRL          CD14 Mono
## AAACATACTGCGTA.1 IMMUNE_CTRL       2747          980 CTRL        T activated
## AAACATACTGCTGA.1 IMMUNE_CTRL       1341          581 CTRL        CD4 Naive T
## AAACATTGAGTGTC.1 IMMUNE_CTRL       2155          880 CTRL              CD8 T
## AAACATTGCTTCGC.1 IMMUNE_CTRL       2536          669 CTRL          CD14 Mono

tail(SO)

##                   orig.ident nCount_RNA nFeature_RNA stim seurat_annotations
## TTTGACTGCCCTAC.1 IMMUNE_STIM       2403          722 STIM          CD14 Mono
## TTTGACTGCTACCC.1 IMMUNE_STIM       1978          745 STIM        CD4 Naive T
## TTTGACTGCTTGGA.1 IMMUNE_STIM       1417          510 STIM       CD4 Memory T
## TTTGACTGGCGAAG.1 IMMUNE_STIM       2205          760 STIM                  B
## TTTGACTGGGTACT.1 IMMUNE_STIM       1123          507 STIM                  B
## TTTGCATGAACGAA.1 IMMUNE_STIM       5577         1237 STIM                 DC
## TTTGCATGACGTAC.1 IMMUNE_STIM       1364          536 STIM       CD4 Memory T
## TTTGCATGCCTGTC.1 IMMUNE_STIM       2114          817 STIM                  B
## TTTGCATGCTAAGC.1 IMMUNE_STIM       1522          523 STIM        CD4 Naive T
## TTTGCATGGGACGA.1 IMMUNE_STIM       1143          503 STIM        CD4 Naive T

As you cann see, according to the STIM group, we have got two subsets to touch upon STIM and Control group (CTRL). We wanna ecut down on the batch effects by considering these two factors. All in all, we must split our SO by considering “STIM” condition.

SO <- NormalizeData(SO)
SO <- FindVariableFeatures(SO)

SO <- RunFastMNN(object.list = SplitObject(SO, split.by = "stim"))

## Computing 2000 integration features

## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from mnn.reconstructed_ to mnnreconstructed_

SO <- RunUMAP(SO, reduction = "mnn", dims = 1:30)

## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session

## 17:43:44 UMAP embedding parameters a = 0.9922 b = 1.112

## 17:43:44 Read 13999 rows and found 30 numeric columns

## 17:43:44 Using Annoy for neighbor search, n_neighbors = 30

## 17:43:44 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 17:43:46 Writing NN index file to temp file /tmp/Rtmp7OUmWE/file2d91fb4b686ab4
## 17:43:46 Searching Annoy index using 1 thread, search_k = 3000
## 17:43:52 Annoy recall = 100%
## 17:43:52 Commencing smooth kNN distance calibration using 1 thread
## 17:43:55 Initializing from normalized Laplacian + noise
## 17:43:55 Commencing optimization for 200 epochs, with 624090 positive edges
## 17:44:04 Optimization finished

SO <- FindNeighbors(SO, reduction = "mnn", dims = 1:30)

## Computing nearest neighbor graph
## Computing SNN

SO <- FindClusters(SO)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 13999
## Number of edges: 524947
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8760
## Number of communities: 15
## Elapsed time: 2 seconds

DimPlot(SO, group.by = c("stim", "ident", "seurat_annotations"), ncol = 3)

Seurat Batch effect

Ehsan Razmara Monash Uni

04/05/2022