knitr::opts_chunk$set(echo = TRUE)

R Markdown documentation for scRNA data analysis (with SCTransform)

#setwd ("C:/Users/rsrivast/Desktop/GSE122960/Dec-Practice/Dec16")
#install.packages("Seurat")
#install.packages("hdf5r")
library(Seurat)
library(dplyr)
library(Matrix)
library(ggplot2)
library(hdf5r)
library(celldex)
library(SingleR)
library(future)
library(sctransform)
plan("multiprocess", workers = 4)
options(future.globals.maxSize = 12000 * 1024^2)

Practice - From data upload to downstream analysis

## load filtered scRNA (h5 format) sample
D1<- Read10X_h5(file = "C:/Users/rsrivast/Desktop/GSE122960/Filtered_Data/GSM3489182_Donor_01_filtered_gene_bc_matrices_h5.h5")
D4<- Read10X_h5(file = "C:/Users/rsrivast/Desktop/GSE122960/Filtered_Data/GSM3489189_Donor_04_filtered_gene_bc_matrices_h5.h5")
D3<- Read10X_h5(file = "C:/Users/rsrivast/Desktop/GSE122960/Filtered_Data/GSM3489187_Donor_03_filtered_gene_bc_matrices_h5.h5")
D6<- Read10X_h5(file = "C:/Users/rsrivast/Desktop/GSE122960/Filtered_Data/GSM3489193_Donor_06_filtered_gene_bc_matrices_h5.h5")

Create Suerat object

D1lungs = CreateSeuratObject(D1, "Old_63y")
D4lungs = CreateSeuratObject(D4, "Old_57y")
D3lungs = CreateSeuratObject(D3, "Young_29y")
D6lungs = CreateSeuratObject(D6, "Young_22y")

#Calculate mitochondrial percentage in each cell

Loop up

D1lungs[["percent.mt"]] <- PercentageFeatureSet(D1lungs, pattern = "^MT-")
D4lungs[["percent.mt"]] <- PercentageFeatureSet(D4lungs, pattern = "^MT-")
D3lungs[["percent.mt"]] <- PercentageFeatureSet(D3lungs, pattern = "^MT-")
D6lungs[["percent.mt"]] <- PercentageFeatureSet(D6lungs, pattern = "^MT-")

#lungs.list=list(D1lungs, D4lungs, D3lungs, D6lungs)

#for (i in 1:length(lungs.list)) {                                      
#                  lungs.list[[i]] <- PercentageFeatureSet(lungs.list[[i]], 
#                                     pattern = "^MT-", col.name = "percent.mt")
#                                 }
#lungs.list
#head(D1lungs@meta.data)

Apply sctransform normalization

head(D1lungs@meta.data)

##                    orig.ident nCount_RNA nFeature_RNA percent.mt
## AAACCTGAGCGTTTAC-1    Old_63y       7743         2034   4.649361
## AAACCTGAGTCGCCGT-1    Old_63y      22427         4158   5.627146
## AAACCTGGTAGGACAC-1    Old_63y       8346         2204   5.919003
## AAACCTGGTGCCTTGG-1    Old_63y       8669         2298   7.013496
## AAACCTGGTTCAGCGC-1    Old_63y       3477         1300   7.794075
## AAACCTGTCCGTAGTA-1    Old_63y      11400         2815   6.192982

lungs.list=list(D1lungs, D4lungs, D3lungs, D6lungs)

for (i in 1:length(lungs.list)) {
    lungs.list[[i]] <- SCTransform(lungs.list[[i]], 
                        vars.to.regress = "percent.mt", 
                        verbose = FALSE)
                                }

This single command replaces NormalizeData, ScaleData, and FindVariableFeatures.

Transformed data will be available in the SCT assay, which is set as the default after running sctransform

During normalization, we can also remove confounding sources of variation, for example, mitochondrial mapping percentage

Notes from Seurat webpage

https://satijalab.org/seurat/v3.2/sctransform_vignette.html

To check the distribution of mito-content per sample

# Generate violin plots

VD.list=list()
for (i in 1:length(lungs.list)) {
    VD.list[[i]] <- VlnPlot(lungs.list[[i]], features = c("nFeature_SCT",       
                    "nCount_SCT", "percent.mt"), ncol = 3, pt.size=0)
                                }
VD.list

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

# PLOT SCATTER

FSD.list=list()
for (i in 1:length(lungs.list)) {
    FSD.list[[i]] <- FeatureScatter(lungs.list[[i]], feature1 = 'nFeature_SCT',                      feature2 = 'percent.mt')
                                }
CombinePlots(plots=FSD.list)

head(D1lungs@meta.data)

##                    orig.ident nCount_RNA nFeature_RNA percent.mt
## AAACCTGAGCGTTTAC-1    Old_63y       7743         2034   4.649361
## AAACCTGAGTCGCCGT-1    Old_63y      22427         4158   5.627146
## AAACCTGGTAGGACAC-1    Old_63y       8346         2204   5.919003
## AAACCTGGTGCCTTGG-1    Old_63y       8669         2298   7.013496
## AAACCTGGTTCAGCGC-1    Old_63y       3477         1300   7.794075
## AAACCTGTCCGTAGTA-1    Old_63y      11400         2815   6.192982

Extract a subset of the data [feature selection]

This step is based on the distribution and user defined cutoff for each sample

#SSD1 <- subset(sctD1, subset = nFeature_SCT > 200 & nFeature_SCT < 5000 & percent.mt < 15 & nCount_SCT < 25000 & nCount_SCT > 2000)
#SSD4 <- subset(sctD4, subset = nFeature_SCT > 200 & nFeature_SCT < 5000 & percent.mt < 15 & nCount_SCT < 25000 & nCount_SCT > 2000)
#SSD3 <- subset(sctD3, subset = nFeature_SCT > 200 & nFeature_SCT < 5000 & percent.mt < 15 & nCount_SCT < 25000 & nCount_SCT > 2000)
#SSD6 <- subset(sctD6, subset = nFeature_SCT > 200 & nFeature_SCT < 5000 & percent.mt < 15 & nCount_SCT < 25000 & nCount_SCT > 2000)

# this ensures that all necessary Pearson residuals have been calculated.
#DAVE TANG BLOG (https://davetang.org/muse/2017/08/01/getting-started-seurat/)

Data Integration (data combined - SCT)

lungs.features <- SelectIntegrationFeatures(object.list = lungs.list, nfeatures = 2500)

lungs.list <- PrepSCTIntegration(object.list = lungs.list, anchor.features = lungs.features, verbose = FALSE)

lungs.anchors <- FindIntegrationAnchors(object.list = lungs.list, normalization.method = "SCT", anchor.features = lungs.features, verbose = TRUE)

lungs.integrated <- IntegrateData(anchorset = lungs.anchors, normalization.method = "SCT", verbose = TRUE)

head(lungs.integrated@meta.data)

##                      orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1    Old_63y       7743         2034   4.649361       8965
## AAACCTGAGTCGCCGT-1_1    Old_63y      22427         4158   5.627146      10411
## AAACCTGGTAGGACAC-1_1    Old_63y       8346         2204   5.919003       9071
## AAACCTGGTGCCTTGG-1_1    Old_63y       8669         2298   7.013496       9150
## AAACCTGGTTCAGCGC-1_1    Old_63y       3477         1300   7.794075       8420
## AAACCTGTCCGTAGTA-1_1    Old_63y      11400         2815   6.192982      10026
##                      nFeature_SCT
## AAACCTGAGCGTTTAC-1_1         2032
## AAACCTGAGTCGCCGT-1_1         3263
## AAACCTGGTAGGACAC-1_1         2202
## AAACCTGGTGCCTTGG-1_1         2293
## AAACCTGGTTCAGCGC-1_1         1520
## AAACCTGTCCGTAGTA-1_1         2814

DefaultAssay(lungs.integrated)

## [1] "integrated"

Do NOT run the “ScaleData” function after integration (in SCT)

Explore the integrated object

head(lungs.integrated@active.assay)

## [1] "integrated"

VlnPlot(lungs.integrated, split.by="orig.ident", "SOD3")

DefaultAssay(lungs.integrated)="RNA"
VlnPlot(lungs.integrated, split.by="orig.ident", "SOD3")

DefaultAssay(lungs.integrated)="SCT"
VlnPlot(lungs.integrated, split.by="orig.ident", "SOD3")

VlnPlot(lungs.integrated, split.by="orig.ident", "SOD3", pt.size = 0)

Clustering (after integration)

DefaultAssay(lungs.integrated) <- "integrated"
#object <- ScaleData(object=My.integrated, verbose = TRUE) ## DONT SCALE IT

lungs.integrated <- RunPCA(object = lungs.integrated, verbose = FALSE) 
#no PCs defined.

lungs.integrated

## An object of class Seurat 
## 55657 features across 20163 samples within 3 assays 
## Active assay: integrated (2500 features, 2500 variable features)
##  2 other assays present: RNA, SCT
##  1 dimensional reduction calculated: pca

head(lungs.integrated@meta.data)

##                      orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1    Old_63y       7743         2034   4.649361       8965
## AAACCTGAGTCGCCGT-1_1    Old_63y      22427         4158   5.627146      10411
## AAACCTGGTAGGACAC-1_1    Old_63y       8346         2204   5.919003       9071
## AAACCTGGTGCCTTGG-1_1    Old_63y       8669         2298   7.013496       9150
## AAACCTGGTTCAGCGC-1_1    Old_63y       3477         1300   7.794075       8420
## AAACCTGTCCGTAGTA-1_1    Old_63y      11400         2815   6.192982      10026
##                      nFeature_SCT
## AAACCTGAGCGTTTAC-1_1         2032
## AAACCTGAGTCGCCGT-1_1         3263
## AAACCTGGTAGGACAC-1_1         2202
## AAACCTGGTGCCTTGG-1_1         2293
## AAACCTGGTTCAGCGC-1_1         1520
## AAACCTGTCCGTAGTA-1_1         2814

ElbowPlot(lungs.integrated, ndims = 50)

ElbowPlot(lungs.integrated, ndims = 10)

## If need to check the level of significance for each clusters, run below 3 codes
#object_JS <- JackStraw(lungs.integrated, num.replicate = 100, dims = 50)
#object_JSS <- ScoreJackStraw(object = object_JS, dims = 1:50)
#JackStrawPlot(object = object_JSS, dims = 1:50)

## Explore the top PCs
DimHeatmap(lungs.integrated, dims = 1, cells = 500, balanced=TRUE)

DimHeatmap(lungs.integrated, dims = 2, cells = 500, balanced=TRUE)

DimHeatmap(lungs.integrated, dims = c(1,3), cells = 500, balanced=TRUE)

DimHeatmap(lungs.integrated, dims = 1:3, cells = 500, balanced=TRUE)

Explore the clusters (Find neighbor, Run tSNE, Find clusters)

DefaultAssay(lungs.integrated)

## [1] "integrated"

lungs.integrated = RunUMAP(lungs.integrated, dims = 1:30)

## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session

## 04:45:39 UMAP embedding parameters a = 0.9922 b = 1.112

## 04:45:39 Read 20163 rows and found 30 numeric columns

## 04:45:39 Using Annoy for neighbor search, n_neighbors = 30

## 04:45:39 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 04:45:42 Writing NN index file to temp file C:\Users\rsrivast\AppData\Local\Temp\1\Rtmpw3rxOC\file4434d861185
## 04:45:42 Searching Annoy index using 4 threads, search_k = 3000
## 04:45:45 Annoy recall = 100%
## 04:45:45 Commencing smooth kNN distance calibration using 4 threads
## 04:45:47 Initializing from normalized Laplacian + noise
## 04:45:48 Commencing optimization for 200 epochs, with 886830 positive edges
## 04:46:10 Optimization finished

DimPlot(lungs.integrated)

DimPlot(lungs.integrated, split.by="orig.ident")

FeaturePlot(lungs.integrated, "SOD3",split.by="orig.ident", cols=c("grey","red","black"))

#RUN tSNE
## Find neighbor using nearest neighbor algorithm & tSNE - "Integrated"
lungs.integrated <- FindNeighbors(lungs.integrated, dims = 1:30) # dim=1:10

## Computing nearest neighbor graph
## Computing SNN

lungs.integrated = RunTSNE(lungs.integrated, dims = 1:30)
DimPlot(lungs.integrated, reduction = "tsne")

DimPlot(lungs.integrated, split.by="orig.ident",reduction = "tsne")

FeaturePlot(lungs.integrated, "SOD3",split.by="orig.ident", cols=c("grey","red","black"),reduction = "tsne")

head(lungs.integrated@meta.data)

##                      orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1    Old_63y       7743         2034   4.649361       8965
## AAACCTGAGTCGCCGT-1_1    Old_63y      22427         4158   5.627146      10411
## AAACCTGGTAGGACAC-1_1    Old_63y       8346         2204   5.919003       9071
## AAACCTGGTGCCTTGG-1_1    Old_63y       8669         2298   7.013496       9150
## AAACCTGGTTCAGCGC-1_1    Old_63y       3477         1300   7.794075       8420
## AAACCTGTCCGTAGTA-1_1    Old_63y      11400         2815   6.192982      10026
##                      nFeature_SCT
## AAACCTGAGCGTTTAC-1_1         2032
## AAACCTGAGTCGCCGT-1_1         3263
## AAACCTGGTAGGACAC-1_1         2202
## AAACCTGGTGCCTTGG-1_1         2293
## AAACCTGGTTCAGCGC-1_1         1520
## AAACCTGTCCGTAGTA-1_1         2814

head(lungs.integrated@active.ident)

## AAACCTGAGCGTTTAC-1_1 AAACCTGAGTCGCCGT-1_1 AAACCTGGTAGGACAC-1_1 
##              Old_63y              Old_63y              Old_63y 
## AAACCTGGTGCCTTGG-1_1 AAACCTGGTTCAGCGC-1_1 AAACCTGTCCGTAGTA-1_1 
##              Old_63y              Old_63y              Old_63y 
## Levels: Old_57y Old_63y Young_22y Young_29y

# find clusters (assay:I = integrated)
objectI10 <- FindClusters(lungs.integrated, resolution = 0.10)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20163
## Number of edges: 748466
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9620
## Number of communities: 10
## Elapsed time: 3 seconds

## Warning: UNRELIABLE VALUE: Future ('future_lapply-1') unexpectedly generated
## random numbers without specifying argument 'seed'. There is a risk that those
## random numbers are not statistically sound and the overall results might be
## invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-
## safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this
## check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore".

AU=DimPlot(objectI10)
AP=DimPlot(objectI10,reduction = "pca")
AT=DimPlot(objectI10,reduction = "tsne")
CombinePlots(list(AU,AP,AT))

## Warning: CombinePlots is being deprecated. Plots should now be combined using
## the patchwork system.

objectI20 <- FindClusters(lungs.integrated, resolution = 0.20)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20163
## Number of edges: 748466
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9343
## Number of communities: 14
## Elapsed time: 3 seconds

## Warning: UNRELIABLE VALUE: Future ('future_lapply-1') unexpectedly generated
## random numbers without specifying argument 'seed'. There is a risk that those
## random numbers are not statistically sound and the overall results might be
## invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-
## safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this
## check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore".

AU=DimPlot(objectI20)
AP=DimPlot(objectI20,reduction = "pca")
AT=DimPlot(objectI20,reduction = "tsne")
CombinePlots(list(AU,AP,AT))

## Warning: CombinePlots is being deprecated. Plots should now be combined using
## the patchwork system.

objectI30 <- FindClusters(lungs.integrated, resolution = 0.30)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20163
## Number of edges: 748466
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9161
## Number of communities: 19
## Elapsed time: 3 seconds

## Warning: UNRELIABLE VALUE: Future ('future_lapply-1') unexpectedly generated
## random numbers without specifying argument 'seed'. There is a risk that those
## random numbers are not statistically sound and the overall results might be
## invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-
## safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this
## check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore".

AU=DimPlot(objectI30)
AP=DimPlot(objectI30,reduction = "pca")
AT=DimPlot(objectI30,reduction = "tsne")
CombinePlots(list(AU,AP,AT))

## Warning: CombinePlots is being deprecated. Plots should now be combined using
## the patchwork system.

objectI50 <- FindClusters(lungs.integrated, resolution = 0.50)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20163
## Number of edges: 748466
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8943
## Number of communities: 25
## Elapsed time: 3 seconds

## Warning: UNRELIABLE VALUE: Future ('future_lapply-1') unexpectedly generated
## random numbers without specifying argument 'seed'. There is a risk that those
## random numbers are not statistically sound and the overall results might be
## invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-
## safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this
## check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore".

AU=DimPlot(objectI50)
AP=DimPlot(objectI50,reduction = "pca")
AT=DimPlot(objectI50,reduction = "tsne")
CombinePlots(list(AU,AP,AT))

## Warning: CombinePlots is being deprecated. Plots should now be combined using
## the patchwork system.

objectI75 <- FindClusters(lungs.integrated, resolution = 0.75)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 20163
## Number of edges: 748466
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8750
## Number of communities: 26
## Elapsed time: 3 seconds

## Warning: UNRELIABLE VALUE: Future ('future_lapply-1') unexpectedly generated
## random numbers without specifying argument 'seed'. There is a risk that those
## random numbers are not statistically sound and the overall results might be
## invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-
## safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this
## check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore".

AU=DimPlot(objectI75)
AP=DimPlot(objectI75,reduction = "pca")
AT=DimPlot(objectI75,reduction = "tsne")
CombinePlots(list(AU,AP,AT))

## Warning: CombinePlots is being deprecated. Plots should now be combined using
## the patchwork system.

#plots <- DimPlot(objectI75, combine = F, group.by="seurat_clusters")
#plots <- lapply(X = plots, FUN = function(x) x + theme(legend.position = "top") + guides(color = guide_legend(nrow = 4, byrow = TRUE, override.aes = list(size = 4))))
#CombinePlots(plots)



## Find average exression of integrated object
I = AverageExpression(objectI30, return.seurat = FALSE, verbose = TRUE)

## Finished averaging RNA for cluster 0
## Finished averaging RNA for cluster 1
## Finished averaging RNA for cluster 2
## Finished averaging RNA for cluster 3
## Finished averaging RNA for cluster 4
## Finished averaging RNA for cluster 5
## Finished averaging RNA for cluster 6
## Finished averaging RNA for cluster 7
## Finished averaging RNA for cluster 8
## Finished averaging RNA for cluster 9
## Finished averaging RNA for cluster 10
## Finished averaging RNA for cluster 11
## Finished averaging RNA for cluster 12
## Finished averaging RNA for cluster 13
## Finished averaging RNA for cluster 14
## Finished averaging RNA for cluster 15
## Finished averaging RNA for cluster 16
## Finished averaging RNA for cluster 17
## Finished averaging RNA for cluster 18
## Finished averaging SCT for cluster 0
## Finished averaging SCT for cluster 1
## Finished averaging SCT for cluster 2
## Finished averaging SCT for cluster 3
## Finished averaging SCT for cluster 4
## Finished averaging SCT for cluster 5
## Finished averaging SCT for cluster 6
## Finished averaging SCT for cluster 7
## Finished averaging SCT for cluster 8
## Finished averaging SCT for cluster 9
## Finished averaging SCT for cluster 10
## Finished averaging SCT for cluster 11
## Finished averaging SCT for cluster 12
## Finished averaging SCT for cluster 13
## Finished averaging SCT for cluster 14
## Finished averaging SCT for cluster 15
## Finished averaging SCT for cluster 16
## Finished averaging SCT for cluster 17
## Finished averaging SCT for cluster 18
## Finished averaging integrated for cluster 0
## Finished averaging integrated for cluster 1
## Finished averaging integrated for cluster 2
## Finished averaging integrated for cluster 3
## Finished averaging integrated for cluster 4
## Finished averaging integrated for cluster 5
## Finished averaging integrated for cluster 6
## Finished averaging integrated for cluster 7
## Finished averaging integrated for cluster 8
## Finished averaging integrated for cluster 9
## Finished averaging integrated for cluster 10
## Finished averaging integrated for cluster 11
## Finished averaging integrated for cluster 12
## Finished averaging integrated for cluster 13
## Finished averaging integrated for cluster 14
## Finished averaging integrated for cluster 15
## Finished averaging integrated for cluster 16
## Finished averaging integrated for cluster 17
## Finished averaging integrated for cluster 18

head (I$integrated)

##                   0           1          2           3            4           5
## SFTPC    -0.7577971  7520.93451 4441.38065   8314.7081  7030.527554  -0.1145684
## SCGB3A2  20.8001658  1097.62823 2743.64639 171972.4259 31827.798650  33.4123979
## SFTPA1   -0.9924176 11414.10460  230.78176   7007.2059  7686.698931  -0.5249207
## SFTPA2   -0.9890068  9577.65346   62.63899   9076.0280  6265.392279  -0.8457792
## SFTPB    -0.9976907  6765.54315 5315.30905   6515.0541  5885.278670  -0.2626763
## SPARCL1 233.5700241    68.57914   50.12627    192.4844     2.476644 549.8199419
##                   6            7           8            9          10
## SFTPC    -0.9987619 9824.7518918  33.7171912    746.77442    514.2621
## SCGB3A2  -0.8932550 6114.4667401  11.9692361 138758.81952    558.4839
## SFTPA1   -0.9950266 2425.3277670   8.1081240     59.35526    341.5823
## SFTPA2   -0.9918993 3476.4300963  -0.9027219    180.27893    167.6429
## SFTPB    -0.9990280 1561.1699410  -0.8205104   2548.91917    338.2328
## SPARCL1 307.2645181    0.6591703 384.5434449   5452.00806 683898.1073
##                 11          12       13          14         15           16
## SFTPC   2470.61403    69.40604 6947.514  35.1313896 489.339273 5.768162e+00
## SCGB3A2 5069.28376 41567.94100 6202.421  -0.8918727  10.189210 5.080986e+02
## SFTPA1  3032.74207    26.56835 3297.218   0.5093533 272.844071 1.118693e+01
## SFTPA2  1941.81013    20.10803 3128.639   3.1897104 383.336544 1.159158e+01
## SFTPB   4587.76935   160.54946 1145.892  -0.9674545 666.250234 6.463162e+00
## SPARCL1   19.03299  4212.82989 4211.095 261.8149211   0.455411 3.200536e+05
##                   17           18
## SFTPC    -0.99930252 1.183632e+03
## SCGB3A2 416.10183968 1.158033e+04
## SFTPA1   -0.99602740 2.258294e-01
## SFTPA2   -0.99354773 1.453671e+01
## SFTPB    -0.99564600 2.503477e+02
## SPARCL1  -0.06841855 1.127384e+04

head (I$RNA)

##                         0           1           2           3           4
## RP11-34P13.3  0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## FAM138A       0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## OR4F5         0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## RP11-34P13.7  0.003971184 0.003953378 0.002627342 0.004737909 0.004028797
## RP11-34P13.8  0.000000000 0.000359398 0.000000000 0.000000000 0.000000000
## RP11-34P13.14 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
##                         5           6           7 8           9 10 11
## RP11-34P13.3  0.000000000 0.000000000 0.000000000 0 0.000000000  0  0
## FAM138A       0.000000000 0.000000000 0.000000000 0 0.000000000  0  0
## OR4F5         0.000000000 0.000000000 0.000000000 0 0.000000000  0  0
## RP11-34P13.7  0.004145433 0.002098024 0.003158606 0 0.005352903  0  0
## RP11-34P13.8  0.002072716 0.000000000 0.000000000 0 0.000000000  0  0
## RP11-34P13.14 0.000000000 0.000000000 0.000000000 0 0.000000000  0  0
##                        12 13          14 15 16 17 18
## RP11-34P13.3  0.000000000  0 0.000000000  0  0  0  0
## FAM138A       0.000000000  0 0.000000000  0  0  0  0
## OR4F5         0.000000000  0 0.000000000  0  0  0  0
## RP11-34P13.7  0.007775031  0 0.008766744  0  0  0  0
## RP11-34P13.8  0.000000000  0 0.000000000  0  0  0  0
## RP11-34P13.14 0.000000000  0 0.000000000  0  0  0  0

head(I$SCT)

##                          0           1           2           3           4
## RP11-34P13.7  0.0021666907 0.002300774 0.001529052 0.002757353 0.002344666
## FO538757.2    0.1821464683 0.172976365 0.193679918 0.211397059 0.196951934
## AP006222.2    0.1385237614 0.037230705 0.044342508 0.036764706 0.026963658
## RP4-669L17.10 0.0028889210 0.003764903 0.002548420 0.003676471 0.001172333
## RP5-857K21.4  0.0002888921 0.001673290 0.000509684 0.002757353 0.003516999
## RP11-206L10.9 0.0169001878 0.033884125 0.022935780 0.027573529 0.031652989
##                         5           6           7          8           9
## RP11-34P13.7  0.002412545 0.001221001 0.001838235 0.00000000 0.003115265
## FO538757.2    0.213510253 0.191697192 0.327205882 0.13232104 0.161993769
## AP006222.2    0.034981906 0.152625153 0.066176471 0.01084599 0.015576324
## RP4-669L17.10 0.002412545 0.001221001 0.003676471 0.00000000 0.000000000
## RP5-857K21.4  0.001206273 0.000000000 0.000000000 0.00000000 0.000000000
## RP11-206L10.9 0.015681544 0.013431013 0.031250000 0.01952278 0.009345794
##                       10         11         12         13          14
## RP11-34P13.7  0.00000000 0.00000000 0.00000000 0.00000000 0.005102041
## FO538757.2    0.16014235 0.16605166 0.19457014 0.25242718 0.224489796
## AP006222.2    0.05338078 0.03690037 0.01809955 0.12621359 0.173469388
## RP4-669L17.10 0.00000000 0.00000000 0.01357466 0.01456311 0.000000000
## RP5-857K21.4  0.00000000 0.00000000 0.00000000 0.00000000 0.000000000
## RP11-206L10.9 0.00000000 0.02214022 0.03619910 0.05825243 0.020408163
##                        15         16         17         18
## RP11-34P13.7  0.000000000 0.00000000 0.00000000 0.00000000
## FO538757.2    0.271794872 0.25581395 0.29230769 0.22950820
## AP006222.2    0.194871795 0.03488372 0.15384615 0.01639344
## RP4-669L17.10 0.005128205 0.02325581 0.00000000 0.00000000
## RP5-857K21.4  0.000000000 0.00000000 0.00000000 0.00000000
## RP11-206L10.9 0.035897436 0.03488372 0.03076923 0.01639344

Idents(objectI30)="orig.ident"
DefaultAssay(objectI30)="SCT"
DoHeatmap(objectI30,features=c("SOD3","LRRK2"))

DefaultAssay(objectI30)="integrated"
DoHeatmap(objectI30,features=c("SOD3","LRRK2"))

Idents(objectI30)="seurat_clusters"

TRY

#saveRDS(object1,"Object1_10X.rds")

#Trick for assigning additional column. For instance - condition

DefaultAssay(objectI30)="SCT"
head(objectI30@meta.data)

##                      orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1    Old_63y       7743         2034   4.649361       8965
## AAACCTGAGTCGCCGT-1_1    Old_63y      22427         4158   5.627146      10411
## AAACCTGGTAGGACAC-1_1    Old_63y       8346         2204   5.919003       9071
## AAACCTGGTGCCTTGG-1_1    Old_63y       8669         2298   7.013496       9150
## AAACCTGGTTCAGCGC-1_1    Old_63y       3477         1300   7.794075       8420
## AAACCTGTCCGTAGTA-1_1    Old_63y      11400         2815   6.192982      10026
##                      nFeature_SCT integrated_snn_res.0.3 seurat_clusters
## AAACCTGAGCGTTTAC-1_1         2032                      1               1
## AAACCTGAGTCGCCGT-1_1         3263                      4               4
## AAACCTGGTAGGACAC-1_1         2202                      2               2
## AAACCTGGTGCCTTGG-1_1         2293                      1               1
## AAACCTGGTTCAGCGC-1_1         1520                      9               9
## AAACCTGTCCGTAGTA-1_1         2814                      3               3

objectI30$condition = 1
temp = objectI30@meta.data
x = dim(temp)[1]
for (i in 1:x)
 {
     if(temp[i,1] == 'Old_57y' || temp[i,1] == 'Old_63y')
     {
         temp[i,9] = "Old"
     }
     
     else
     {
         temp[i,9] = "Young"
     }
 }

objectI30@meta.data=temp
head(objectI30@meta.data)

##                      orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1    Old_63y       7743         2034   4.649361       8965
## AAACCTGAGTCGCCGT-1_1    Old_63y      22427         4158   5.627146      10411
## AAACCTGGTAGGACAC-1_1    Old_63y       8346         2204   5.919003       9071
## AAACCTGGTGCCTTGG-1_1    Old_63y       8669         2298   7.013496       9150
## AAACCTGGTTCAGCGC-1_1    Old_63y       3477         1300   7.794075       8420
## AAACCTGTCCGTAGTA-1_1    Old_63y      11400         2815   6.192982      10026
##                      nFeature_SCT integrated_snn_res.0.3 seurat_clusters
## AAACCTGAGCGTTTAC-1_1         2032                      1               1
## AAACCTGAGTCGCCGT-1_1         3263                      4               4
## AAACCTGGTAGGACAC-1_1         2202                      2               2
## AAACCTGGTGCCTTGG-1_1         2293                      1               1
## AAACCTGGTTCAGCGC-1_1         1520                      9               9
## AAACCTGTCCGTAGTA-1_1         2814                      3               3
##                      condition
## AAACCTGAGCGTTTAC-1_1       Old
## AAACCTGAGTCGCCGT-1_1       Old
## AAACCTGGTAGGACAC-1_1       Old
## AAACCTGGTGCCTTGG-1_1       Old
## AAACCTGGTTCAGCGC-1_1       Old
## AAACCTGTCCGTAGTA-1_1       Old

VlnPlot(objectI30, "SOD3", split.by = "orig.ident")

VlnPlot(objectI30, "SOD3", split.by = "condition")

#plots <- DimPlot(lungs.integrated, "SOD3", group.by = c("orig.ident","condition"), combine = F)
#plots <- lapply(X = plots, FUN = function(x) x + theme(legend.position = "top") + guides(color = guide_legend(nrow = 4, byrow = TRUE, override.aes = list(size = 4))))
#CombinePlots(plots)

Identify all the markers in identified clusters [objectS20]

DefaultAssay(objectI30)="SCT"
objectI30

## An object of class Seurat 
## 55657 features across 20163 samples within 3 assays 
## Active assay: SCT (19463 features, 0 variable features)
##  2 other assays present: RNA, integrated
##  3 dimensional reductions calculated: pca, umap, tsne

objectI30_markers=FindAllMarkers(objectI30,only.pos = T,logfc.threshold = 0.25, min.pct = 0.10)

## Calculating cluster 0

## Calculating cluster 1

## Calculating cluster 2

## Calculating cluster 3

## Calculating cluster 4

## Calculating cluster 5

## Calculating cluster 6

## Calculating cluster 7

## Calculating cluster 8

## Calculating cluster 9

## Calculating cluster 10

## Calculating cluster 11

## Calculating cluster 12

## Calculating cluster 13

## Calculating cluster 14

## Calculating cluster 15

## Calculating cluster 16

## Calculating cluster 17

## Calculating cluster 18

tail(objectI30_markers)

##               p_val avg_logFC pct.1 pct.2 p_val_adj cluster   gene
## CLK1    0.006228930 0.2517279 0.393 0.281         1      18   CLK1
## CRBN    0.006843296 0.2553573 0.295 0.196         1      18   CRBN
## PLP22   0.006845073 0.2839817 0.443 0.323         1      18   PLP2
## NOP561  0.008829589 0.2536799 0.295 0.197         1      18  NOP56
## AHNAK5  0.008845046 0.3642356 0.508 0.448         1      18  AHNAK
## HNRNPK1 0.009698877 0.2818846 0.836 0.691         1      18 HNRNPK

FeaturePlot(objectI30, features = c("C1QB","C1QA","FABP4","SOD3"), cols=c("grey","red", "black"))

FeaturePlot(objectI30, features = c("C1QB","C1QA","FABP4","SOD3"), cols=c("grey","red", "black"),reduction = "tsne")

#write.table(objectI30_markers,"All_marker_Clusters.txt") # Explore the table

## Random try
#DefaultAssay(objectI30)="SCT"
FeaturePlot(objectI30, features = "SOD3",cols=c("grey","red", "black"))

FeaturePlot(objectI30, features = "SOD3",cols=c("grey","red", "black"), split.by="orig.ident")

FeaturePlot(objectI30, features = c("SOD3", "TMPRSS2"),cols=c("grey","red", "black"))

FeaturePlot(objectI30, features = c("SOD3", "TMPRSS2"), split.by = "orig.ident", cols=c("grey","red", "black"))

VlnPlot(objectI30, features = c("SOD3", "TMPRSS2"), split.by = "orig.ident")

Idents(objectI30)="condition"
FeaturePlot(objectI30, features = c("SOD3", "TMPRSS2"),cols=c("grey","red", "black"))

FeaturePlot(objectI30, features = c("SOD3", "TMPRSS2"), split.by = "condition", cols=c("grey","red", "black"))

VlnPlot(objectI30, features = c("SOD3", "TMPRSS2"), split.by = "orig.ident")

Check the status of top 5 marker genes per cluster

## Note: scaling up is an important step, if not done previously, can be implemented for selected genes
Idents(objectI30)="seurat_clusters"
top5 <- objectI30_markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_logFC)
head(top5)

## # A tibble: 6 x 7
## # Groups:   cluster [2]
##   p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene  
##   <dbl>     <dbl> <dbl> <dbl>     <dbl> <fct>   <chr> 
## 1     0      1.86 0.996 0.479         0 0       C1QB  
## 2     0      1.77 0.993 0.438         0 0       C1QA  
## 3     0      1.74 0.999 0.783         0 0       APOC1 
## 4     0      1.62 0.939 0.355         0 0       FABP4 
## 5     0      1.59 0.982 0.389         0 0       CD52  
## 6     0      1.49 1     0.801         0 1       SFTPA1

DefaultAssay(objectI30)

## [1] "SCT"

#T5 <- ScaleData(object1, features = top5$gene)

DoHeatmap(objectI30, features = top5$gene) + NoLegend()

Idents(objectI30)="condition"
DoHeatmap(objectI30, features = top5$gene) + NoLegend()

Idents(objectI30)="orig.ident"
DoHeatmap(objectI30, features = top5$gene) + NoLegend()

Data Integration (data combined)

Assign cell types to the clusters

table(objectI30@meta.data$seurat_clusters,objectI30@meta.data$condition)

##     
##       Old Young
##   0  3454  3469
##   1  2709  2072
##   2  1310   652
##   3   801   287
##   4   480   373
##   5   218   611
##   6   375   444
##   7   206   338
##   8   135   326
##   9   161   160
##   10  138   143
##   11  147   124
##   12  126    95
##   13  117    89
##   14   87   109
##   15   63   132
##   16   28    58
##   17   32    33
##   18   41    20

head(objectI30@meta.data)

##                      orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1    Old_63y       7743         2034   4.649361       8965
## AAACCTGAGTCGCCGT-1_1    Old_63y      22427         4158   5.627146      10411
## AAACCTGGTAGGACAC-1_1    Old_63y       8346         2204   5.919003       9071
## AAACCTGGTGCCTTGG-1_1    Old_63y       8669         2298   7.013496       9150
## AAACCTGGTTCAGCGC-1_1    Old_63y       3477         1300   7.794075       8420
## AAACCTGTCCGTAGTA-1_1    Old_63y      11400         2815   6.192982      10026
##                      nFeature_SCT integrated_snn_res.0.3 seurat_clusters
## AAACCTGAGCGTTTAC-1_1         2032                      1               1
## AAACCTGAGTCGCCGT-1_1         3263                      4               4
## AAACCTGGTAGGACAC-1_1         2202                      2               2
## AAACCTGGTGCCTTGG-1_1         2293                      1               1
## AAACCTGGTTCAGCGC-1_1         1520                      9               9
## AAACCTGTCCGTAGTA-1_1         2814                      3               3
##                      condition
## AAACCTGAGCGTTTAC-1_1       Old
## AAACCTGAGTCGCCGT-1_1       Old
## AAACCTGGTAGGACAC-1_1       Old
## AAACCTGGTGCCTTGG-1_1       Old
## AAACCTGGTTCAGCGC-1_1       Old
## AAACCTGTCCGTAGTA-1_1       Old

objectI30$Cond_Clus = 1
objectI30$Cond_Clus = paste(objectI30$condition, objectI30$seurat_clusters, sep = "_")
#table(objectI30@meta.data$seurat_clusters,objectI30@meta.data$condition)

Idents(objectI30)="seurat_clusters"
hpca <- HumanPrimaryCellAtlasData()
singler_2=SingleR(de.method ="classic", test=objectI30@assays$RNA@data, clusters = objectI30@meta.data$seurat_clusters, genes = "de", quantile = 0.8, tune.thresh = 0.05, fine.tune = TRUE, sd.thresh = 1, ref=hpca@assays@data$logcounts,labels=hpca$label.main)
label_cells=singler_2$first.labels
label_cells

##  [1] "Macrophage"        "Epithelial_cells"  "Epithelial_cells" 
##  [4] "Epithelial_cells"  "Epithelial_cells"  "Monocyte"         
##  [7] "Macrophage"        "Epithelial_cells"  "NK_cell"          
## [10] "Epithelial_cells"  "Endothelial_cells" "Epithelial_cells" 
## [13] "Epithelial_cells"  "Macrophage"        "Macrophage"       
## [16] "Macrophage"        "Fibroblasts"       "Endothelial_cells"
## [19] "Macrophage"

Idents(objectI30)=plyr::mapvalues(Idents(objectI30),from = c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18), to = label_cells)
DimPlot(objectI30,split.by="condition", reduction = "tsne")

FeaturePlot(objectI30,"SOD3", label=T, cols=c("grey","orange","red","black"),reduction = "tsne")

FeaturePlot(objectI30,"SOD3", split.by="condition", label=T, cols = c("grey","orange","red","black"),reduction = "tsne")

VlnPlot(objectI30, split.by="condition","SOD3")

DIFFERENTIAL EXPRESSION ANALYSIS: young vs old

Idents(objectI30)="condition"
DefaultAssay(objectI30)="SCT"
head(objectI30@meta.data)

##                      orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1    Old_63y       7743         2034   4.649361       8965
## AAACCTGAGTCGCCGT-1_1    Old_63y      22427         4158   5.627146      10411
## AAACCTGGTAGGACAC-1_1    Old_63y       8346         2204   5.919003       9071
## AAACCTGGTGCCTTGG-1_1    Old_63y       8669         2298   7.013496       9150
## AAACCTGGTTCAGCGC-1_1    Old_63y       3477         1300   7.794075       8420
## AAACCTGTCCGTAGTA-1_1    Old_63y      11400         2815   6.192982      10026
##                      nFeature_SCT integrated_snn_res.0.3 seurat_clusters
## AAACCTGAGCGTTTAC-1_1         2032                      1               1
## AAACCTGAGTCGCCGT-1_1         3263                      4               4
## AAACCTGGTAGGACAC-1_1         2202                      2               2
## AAACCTGGTGCCTTGG-1_1         2293                      1               1
## AAACCTGGTTCAGCGC-1_1         1520                      9               9
## AAACCTGTCCGTAGTA-1_1         2814                      3               3
##                      condition Cond_Clus
## AAACCTGAGCGTTTAC-1_1       Old     Old_1
## AAACCTGAGTCGCCGT-1_1       Old     Old_4
## AAACCTGGTAGGACAC-1_1       Old     Old_2
## AAACCTGGTGCCTTGG-1_1       Old     Old_1
## AAACCTGGTTCAGCGC-1_1       Old     Old_9
## AAACCTGTCCGTAGTA-1_1       Old     Old_3

objectI30_DEGs <- FindMarkers(objectI30, ident.1 = "Young", ident.2 = "Old",logfc.threshold = 0.25, min.pct=0.30)
head (objectI30_DEGs,50)

##                       p_val  avg_logFC pct.1 pct.2     p_val_adj
## APOE           0.000000e+00  1.0764221 0.630 0.312  0.000000e+00
## RP11-1143G9.4  0.000000e+00  0.6137343 0.412 0.148  0.000000e+00
## SNHG25         0.000000e+00  0.6080597 0.815 0.516  0.000000e+00
## PABPC4         0.000000e+00  0.4904915 0.846 0.578  0.000000e+00
## XIST           0.000000e+00  0.4835448 0.754 0.371  0.000000e+00
## CHCHD10        0.000000e+00  0.4769061 0.536 0.230  0.000000e+00
## MT-ATP8        0.000000e+00  0.4266514 0.630 0.394  0.000000e+00
## ATP5D          0.000000e+00  0.4093635 0.629 0.431  0.000000e+00
## EIF2S2         0.000000e+00  0.3899471 0.891 0.730  0.000000e+00
## CBX3           0.000000e+00  0.3839203 0.796 0.569  0.000000e+00
## TBCA           0.000000e+00  0.3329524 0.950 0.857  0.000000e+00
## MT-ND3         0.000000e+00 -0.3256366 0.998 1.000  0.000000e+00
## FOSB           0.000000e+00 -0.3537806 0.245 0.497  0.000000e+00
## CD9            0.000000e+00 -0.3835476 0.831 0.923  0.000000e+00
## MT-CO3         0.000000e+00 -0.4184467 0.998 1.000  0.000000e+00
## SOCS3          0.000000e+00 -0.4297941 0.196 0.482  0.000000e+00
## MT-ND4         0.000000e+00 -0.4438800 1.000 1.000  0.000000e+00
## MT-CO2         0.000000e+00 -0.4475630 0.999 1.000  0.000000e+00
## RPS10          0.000000e+00 -0.4789120 0.983 0.993  0.000000e+00
## MT-ATP6        0.000000e+00 -0.4998865 0.999 1.000  0.000000e+00
## MT-CYB         0.000000e+00 -0.5529803 0.998 1.000  0.000000e+00
## RHOB           0.000000e+00 -0.5540921 0.395 0.689  0.000000e+00
## JUNB           0.000000e+00 -0.5569892 0.474 0.772  0.000000e+00
## HSP90AA1       0.000000e+00 -0.5578679 0.889 0.970  0.000000e+00
## KLF6           0.000000e+00 -0.6010049 0.742 0.930  0.000000e+00
## SFTPC          0.000000e+00 -0.6100808 0.974 0.993  0.000000e+00
## ZFP36          0.000000e+00 -0.6174962 0.554 0.866  0.000000e+00
## SCGB3A2        0.000000e+00 -0.6376392 0.227 0.472  0.000000e+00
## FOS            0.000000e+00 -0.6391686 0.730 0.955  0.000000e+00
## HSPA1B         0.000000e+00 -0.6977429 0.103 0.431  0.000000e+00
## JUN            0.000000e+00 -0.7589664 0.543 0.847  0.000000e+00
## HSPA1A         0.000000e+00 -0.8406444 0.156 0.466  0.000000e+00
## HLA-DRB5       0.000000e+00 -0.8869007 0.418 0.791  0.000000e+00
## SLPI           0.000000e+00 -0.8980185 0.733 0.821  0.000000e+00
## RPS4Y1         0.000000e+00 -1.2249332 0.001 0.501  0.000000e+00
## RPS27L        4.566876e-303  0.3585116 0.963 0.863 8.888510e-299
## DDX5          6.809032e-302 -0.2618043 0.877 0.964 1.325242e-297
## NPC2          1.363993e-290 -0.2619844 0.960 0.980 2.654740e-286
## ATP5I         4.258358e-280  0.3510069 0.938 0.861 8.288041e-276
## CXCL2         4.240879e-270 -0.4455831 0.373 0.607 8.254024e-266
## FABP5         1.371030e-269 -0.4519820 0.878 0.919 2.668436e-265
## GADD45B       2.124888e-268 -0.4879368 0.307 0.527 4.135669e-264
## NFKBIA        5.901474e-259 -0.3687697 0.706 0.849 1.148604e-254
## RPS11         2.981106e-249 -0.4309405 0.997 0.995 5.802126e-245
## HLA-B         2.743128e-248  0.3528311 0.976 0.901 5.338951e-244
## ELF3          2.153854e-239 -0.4552607 0.227 0.430 4.192045e-235
## MT-ND2        2.189784e-238 -0.3792294 1.000 1.000 4.261976e-234
## AGR2          5.233998e-234 -0.3950638 0.188 0.386 1.018693e-229
## MT-CO1        1.517712e-233 -0.6005171 1.000 1.000 2.953923e-229
## RPS20         2.215862e-229 -0.3786682 1.000 1.000 4.312733e-225

Compare specific clusters

#LATER

head(objectI30@meta.data)

##                      orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1    Old_63y       7743         2034   4.649361       8965
## AAACCTGAGTCGCCGT-1_1    Old_63y      22427         4158   5.627146      10411
## AAACCTGGTAGGACAC-1_1    Old_63y       8346         2204   5.919003       9071
## AAACCTGGTGCCTTGG-1_1    Old_63y       8669         2298   7.013496       9150
## AAACCTGGTTCAGCGC-1_1    Old_63y       3477         1300   7.794075       8420
## AAACCTGTCCGTAGTA-1_1    Old_63y      11400         2815   6.192982      10026
##                      nFeature_SCT integrated_snn_res.0.3 seurat_clusters
## AAACCTGAGCGTTTAC-1_1         2032                      1               1
## AAACCTGAGTCGCCGT-1_1         3263                      4               4
## AAACCTGGTAGGACAC-1_1         2202                      2               2
## AAACCTGGTGCCTTGG-1_1         2293                      1               1
## AAACCTGGTTCAGCGC-1_1         1520                      9               9
## AAACCTGTCCGTAGTA-1_1         2814                      3               3
##                      condition Cond_Clus
## AAACCTGAGCGTTTAC-1_1       Old     Old_1
## AAACCTGAGTCGCCGT-1_1       Old     Old_4
## AAACCTGGTAGGACAC-1_1       Old     Old_2
## AAACCTGGTGCCTTGG-1_1       Old     Old_1
## AAACCTGGTTCAGCGC-1_1       Old     Old_9
## AAACCTGTCCGTAGTA-1_1       Old     Old_3

cluster1=subset(objectI30,subset=seurat_clusters==1)
#Cluster the cells by specific pathways [Next plan]
# Ex 80 genes, Cluster the cells 

head(cluster1)

## An object of class Seurat 
## 12 features across 4781 samples within 2 assays 
## Active assay: SCT (6 features, 0 variable features)
##  1 other assay present: RNA
##  2 dimensional reductions calculated: umap, tsne

DimPlot(cluster1, split.by="orig.ident")

DimPlot(cluster1, split.by="condition")

Idents(cluster1)="condition"
head(cluster1@meta.data)

##                      orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1    Old_63y       7743         2034   4.649361       8965
## AAACCTGGTGCCTTGG-1_1    Old_63y       8669         2298   7.013496       9150
## AAACGGGAGCCGATTT-1_1    Old_63y       8977         2216   7.708589       9260
## AAACGGGAGGTTCCTA-1_1    Old_63y      10164         2427   4.338843       9720
## AAACGGGGTCCTCCAT-1_1    Old_63y      13847         2919   4.759154      10216
## AAAGATGCAAGGGTCA-1_1    Old_63y       7322         1651   7.456979       8974
##                      nFeature_SCT integrated_snn_res.0.3 seurat_clusters
## AAACCTGAGCGTTTAC-1_1         2032                      1               1
## AAACCTGGTGCCTTGG-1_1         2293                      1               1
## AAACGGGAGCCGATTT-1_1         2213                      1               1
## AAACGGGAGGTTCCTA-1_1         2426                      1               1
## AAACGGGGTCCTCCAT-1_1         2890                      1               1
## AAAGATGCAAGGGTCA-1_1         1650                      1               1
##                      condition Cond_Clus
## AAACCTGAGCGTTTAC-1_1       Old     Old_1
## AAACCTGGTGCCTTGG-1_1       Old     Old_1
## AAACGGGAGCCGATTT-1_1       Old     Old_1
## AAACGGGAGGTTCCTA-1_1       Old     Old_1
## AAACGGGGTCCTCCAT-1_1       Old     Old_1
## AAAGATGCAAGGGTCA-1_1       Old     Old_1

Y=subset(cluster1,subset=condition=="Young")
O=subset(cluster1,subset=condition=="Old")
mergeClus1=merge(x = Y, y = O) 
head(mergeClus1@active.ident)

## AAACCTGCAAGTAATG-1_3 AAACGGGTCGCTGATA-1_3 AAAGATGTCAGCACAT-1_3 
##                Young                Young                Young 
## AAAGCAAAGATGTGGC-1_3 AAAGCAAAGCCGCCTA-1_3 AAAGCAACAAAGGAAG-1_3 
##                Young                Young                Young 
## Levels: Old Young

head(mergeClus1@active.assay)

## [1] "SCT"

DefaultAssay(mergeClus1) <- "SCT"
head(mergeClus1@active.assay)

## [1] "SCT"

FAmergeClus1 <- FindVariableFeatures(object = mergeClus1, selection.method = "vst", nfeatures = 2000)
FAmergeClus1 <- ScaleData(FAmergeClus1, verbose = TRUE)
FAClus1 <- RunPCA(object = FAmergeClus1, npcs = 50, verbose = FALSE)
DimHeatmap(object = FAClus1, dims = 1, cells = 500, balanced=TRUE)

ElbowPlot(FAClus1, ndims = 50)

FAClus1 <- FindNeighbors(object = FAClus1, dims = 1:5)
FAClus1= RunTSNE(FAClus1, dims=1:5)
head (FAClus1@meta.data)

##                      orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGCAAGTAATG-1_3  Young_29y       1416          737   9.816384       3237
## AAACGGGTCGCTGATA-1_3  Young_29y       5103         2038   2.900255       4278
## AAAGATGTCAGCACAT-1_3  Young_29y       2980         1244   2.382550       3424
## AAAGCAAAGATGTGGC-1_3  Young_29y       3817         1537   2.279277       3806
## AAAGCAAAGCCGCCTA-1_3  Young_29y       4412         1736   3.014506       4066
## AAAGCAACAAAGGAAG-1_3  Young_29y       4240         1709   6.438679       3999
##                      nFeature_SCT integrated_snn_res.0.3 seurat_clusters
## AAACCTGCAAGTAATG-1_3          850                      1               1
## AAACGGGTCGCTGATA-1_3         2029                      1               1
## AAAGATGTCAGCACAT-1_3         1242                      1               1
## AAAGCAAAGATGTGGC-1_3         1536                      1               1
## AAAGCAAAGCCGCCTA-1_3         1735                      1               1
## AAAGCAACAAAGGAAG-1_3         1705                      1               1
##                      condition Cond_Clus
## AAACCTGCAAGTAATG-1_3     Young   Young_1
## AAACGGGTCGCTGATA-1_3     Young   Young_1
## AAAGATGTCAGCACAT-1_3     Young   Young_1
## AAAGCAAAGATGTGGC-1_3     Young   Young_1
## AAAGCAAAGCCGCCTA-1_3     Young   Young_1
## AAAGCAACAAAGGAAG-1_3     Young   Young_1

Idents(cluster1)="seurat_clusters"
DimPlot(FAClus1)

DimPlot(FAClus1, split.by="orig.ident")

FeaturePlot(FAClus1,"SOD3", split.by="condition")

FeaturePlot(FAClus1,"SOD3", cols=c("grey","orange","red"))

FeaturePlot(FAClus1,"SOD3", split.by="orig.ident", cols=c("grey","orange","red"))

#TRY ONE MORE ATTEMPT WITH 
Idents(cluster1)="seurat_clusters"
head(cluster1@meta.data)

##                      orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1    Old_63y       7743         2034   4.649361       8965
## AAACCTGGTGCCTTGG-1_1    Old_63y       8669         2298   7.013496       9150
## AAACGGGAGCCGATTT-1_1    Old_63y       8977         2216   7.708589       9260
## AAACGGGAGGTTCCTA-1_1    Old_63y      10164         2427   4.338843       9720
## AAACGGGGTCCTCCAT-1_1    Old_63y      13847         2919   4.759154      10216
## AAAGATGCAAGGGTCA-1_1    Old_63y       7322         1651   7.456979       8974
##                      nFeature_SCT integrated_snn_res.0.3 seurat_clusters
## AAACCTGAGCGTTTAC-1_1         2032                      1               1
## AAACCTGGTGCCTTGG-1_1         2293                      1               1
## AAACGGGAGCCGATTT-1_1         2213                      1               1
## AAACGGGAGGTTCCTA-1_1         2426                      1               1
## AAACGGGGTCCTCCAT-1_1         2890                      1               1
## AAAGATGCAAGGGTCA-1_1         1650                      1               1
##                      condition Cond_Clus
## AAACCTGAGCGTTTAC-1_1       Old     Old_1
## AAACCTGGTGCCTTGG-1_1       Old     Old_1
## AAACGGGAGCCGATTT-1_1       Old     Old_1
## AAACGGGAGGTTCCTA-1_1       Old     Old_1
## AAACGGGGTCCTCCAT-1_1       Old     Old_1
## AAAGATGCAAGGGTCA-1_1       Old     Old_1

Y=subset(cluster1,subset=condition=="Young")
O=subset(cluster1,subset=condition=="Old")
mergeClus1=merge(x = Y, y = O) 
head(mergeClus1@active.ident)

## AAACCTGCAAGTAATG-1_3 AAACGGGTCGCTGATA-1_3 AAAGATGTCAGCACAT-1_3 
##                    1                    1                    1 
## AAAGCAAAGATGTGGC-1_3 AAAGCAAAGCCGCCTA-1_3 AAAGCAACAAAGGAAG-1_3 
##                    1                    1                    1 
## Levels: 1

head(mergeClus1@active.assay)

## [1] "SCT"

DefaultAssay(mergeClus1) <- "SCT"
head(mergeClus1@active.assay)

## [1] "SCT"

FAmergeClus1 <- FindVariableFeatures(object = mergeClus1, selection.method = "vst", nfeatures = 2000)
FAmergeClus1 <- ScaleData(FAmergeClus1, verbose = TRUE)
FAClus1 <- RunPCA(object = FAmergeClus1, npcs = 50, verbose = FALSE)
DimHeatmap(object = FAClus1, dims = 1, cells = 500, balanced=TRUE)

ElbowPlot(FAClus1, ndims = 50)

FAClus1 <- FindNeighbors(object = FAClus1, dims = 1:3)
FAClus1= RunTSNE(FAClus1, dims=1:3)
head (FAClus1@meta.data)

##                      orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGCAAGTAATG-1_3  Young_29y       1416          737   9.816384       3237
## AAACGGGTCGCTGATA-1_3  Young_29y       5103         2038   2.900255       4278
## AAAGATGTCAGCACAT-1_3  Young_29y       2980         1244   2.382550       3424
## AAAGCAAAGATGTGGC-1_3  Young_29y       3817         1537   2.279277       3806
## AAAGCAAAGCCGCCTA-1_3  Young_29y       4412         1736   3.014506       4066
## AAAGCAACAAAGGAAG-1_3  Young_29y       4240         1709   6.438679       3999
##                      nFeature_SCT integrated_snn_res.0.3 seurat_clusters
## AAACCTGCAAGTAATG-1_3          850                      1               1
## AAACGGGTCGCTGATA-1_3         2029                      1               1
## AAAGATGTCAGCACAT-1_3         1242                      1               1
## AAAGCAAAGATGTGGC-1_3         1536                      1               1
## AAAGCAAAGCCGCCTA-1_3         1735                      1               1
## AAAGCAACAAAGGAAG-1_3         1705                      1               1
##                      condition Cond_Clus
## AAACCTGCAAGTAATG-1_3     Young   Young_1
## AAACGGGTCGCTGATA-1_3     Young   Young_1
## AAAGATGTCAGCACAT-1_3     Young   Young_1
## AAAGCAAAGATGTGGC-1_3     Young   Young_1
## AAAGCAAAGCCGCCTA-1_3     Young   Young_1
## AAAGCAACAAAGGAAG-1_3     Young   Young_1

Idents(cluster1)="Cond_Clus"
DimPlot(FAClus1)

DimPlot(FAClus1, split.by="Cond_Clus")

#FeaturePlot(FAClus1,"SOD3", split.by="condition")
FeaturePlot(FAClus1,"SOD3", cols=c("grey","orange","red"), label=T)

FeaturePlot(FAClus1,"SOD3", split.by="orig.ident", cols=c("grey","orange","red"))

head(FAClus1@active.assay)

## [1] "SCT"

Idents(FAClus1)="condition"
FAClus1_markers <- FindMarkers(FAClus1, ident.1 = "Young", ident.2 = "Old",logfc.threshold = 0.25)
head (FAClus1_markers)

##                p_val  avg_logFC pct.1 pct.2     p_val_adj
## SOD3    0.000000e+00  1.0009360 0.841 0.452  0.000000e+00
## KLF6   1.714084e-250 -0.8197599 0.553 0.886 3.336122e-246
## TACC2  2.196916e-248  0.5859494 0.964 0.797 4.275858e-244
## SLPI   5.671500e-248 -0.6468030 1.000 1.000 1.103844e-243
## RPS4Y1 3.180983e-232 -1.2169557 0.000 0.403 6.191148e-228
## EIF2S2 2.974092e-214  0.6183710 0.909 0.674 5.788476e-210

cluster2=subset(objectI30,subset=seurat_clusters==2)
Idents(cluster2)="condition"
DEClus2_markers <- FindMarkers(cluster2, ident.1 = "Young", ident.2 = "Old",logfc.threshold = 0.25)
head (DEClus2_markers)

##               p_val  avg_logFC pct.1 pct.2    p_val_adj
## SLPI   1.750918e-93 -0.6658101 0.979 0.997 3.407812e-89
## SOD3   1.059162e-76  0.8321142 0.738 0.441 2.061447e-72
## APOE   1.626007e-75  0.4141389 0.348 0.040 3.164697e-71
## DONSON 1.346086e-74  0.3822088 0.264 0.009 2.619887e-70
## EIF2S2 6.801560e-71  0.5761827 0.902 0.678 1.323788e-66
## RPS4Y1 2.233024e-67 -1.2145276 0.000 0.360 4.346135e-63

cluster3=subset(objectI30,subset=seurat_clusters==3)
Idents(cluster3)="condition"
DEClus3_markers <- FindMarkers(cluster3, ident.1 = "Young", ident.2 = "Old",logfc.threshold = 0.25)
head (DEClus3_markers)

##                p_val  avg_logFC pct.1 pct.2    p_val_adj
## XIST    1.977756e-74  0.7735869 0.871 0.278 3.849307e-70
## RPS4Y1  1.460157e-69 -1.4780800 0.000 0.648 2.841905e-65
## DONSON  1.421639e-58  0.4356137 0.328 0.007 2.766936e-54
## KLF6    1.096396e-56 -0.9249761 0.481 0.905 2.133915e-52
## SCGB3A2 3.864849e-56 -0.8824029 0.944 0.968 7.522156e-52
## SLPI    4.950875e-54 -0.6422324 1.000 1.000 9.635887e-50

cluster4=subset(objectI30,subset=seurat_clusters==4)
Idents(cluster4)="condition"
DEClus4_markers <- FindMarkers(cluster4, ident.1 = "Young", ident.2 = "Old",logfc.threshold = 0.25)
head (DEClus4_markers)

##                 p_val  avg_logFC pct.1 pct.2    p_val_adj
## SOD3     1.495984e-78  1.1662374 0.885 0.446 2.911633e-74
## KLF6     1.248456e-71 -1.0327124 0.641 0.960 2.429870e-67
## HLA-DRB5 1.149570e-62 -0.6888812 0.188 0.754 2.237407e-58
## TACC2    1.472720e-56  0.6329141 0.981 0.827 2.866356e-52
## DONSON   2.377614e-56  0.5526940 0.450 0.010 4.627549e-52
## SLPI     4.637310e-50 -0.6552608 1.000 1.000 9.025596e-46

Idents(cluster1)="condition"
cluster.list=list(cluster1,cluster2,cluster3,cluster4)
VCL.list=list()
for (i in 1:length(cluster.list)) {
    VCL.list[[i]] <- VlnPlot(cluster.list[[i]], features=c("SOD3","KLF6"), split.by="condition",  
                     pt.size=0)
                                  }
CombinePlots(VCL.list)

for (i in 1:length(cluster.list)) {
    VCL.list[[i]] <- FeaturePlot(cluster.list[[i]], features=c("SOD3","KLF6"), cols=c("grey","red","black"),split.by="condition")
                                  }
CombinePlots(VCL.list)

#DimPlot(Fibrotic)

#mergeClus0=merge(x = Donor, y = Fibrotic) # merging and assigning the object
#head(mergeClus0@meta.data)
#NOTE: How to make use of it?

Practice-X: scRNA-seq Analysis to evaluate the reproducibility of Ahmed et al 2020 article

Explore the re-clustering analysis and perform DE analysis for specific cluster

Rajneesh Srivastava

12/18/2020

R Markdown documentation for scRNA data analysis (with SCTransform)

Practice - From data upload to downstream analysis

Create Suerat object

Loop up

Apply sctransform normalization

This single command replaces NormalizeData, ScaleData, and FindVariableFeatures.

Transformed data will be available in the SCT assay, which is set as the default after running sctransform

During normalization, we can also remove confounding sources of variation, for example, mitochondrial mapping percentage

Notes from Seurat webpage

https://satijalab.org/seurat/v3.2/sctransform_vignette.html

To check the distribution of mito-content per sample

Extract a subset of the data [feature selection]

This step is based on the distribution and user defined cutoff for each sample

Do NOT run the “ScaleData” function after integration (in SCT)

Explore the integrated object

Clustering (after integration)

Explore the clusters (Find neighbor, Run tSNE, Find clusters)

TRY

Identify all the markers in identified clusters [objectS20]

Check the status of top 5 marker genes per cluster

Assign cell types to the clusters

DIFFERENTIAL EXPRESSION ANALYSIS: young vs old

Compare specific clusters

Thank you