knitr::opts_chunk$set(echo = TRUE)
#setwd ("C:/Users/rsrivast/Desktop/GSE122960/Dec-Practice/Dec16")
#install.packages("Seurat")
#install.packages("hdf5r")
library(Seurat)
library(dplyr)
library(Matrix)
library(ggplot2)
library(hdf5r)
library(celldex)
library(SingleR)
library(future)
library(sctransform)
plan("multiprocess", workers = 4)
options(future.globals.maxSize = 12000 * 1024^2)
## load filtered scRNA (h5 format) sample
D1<- Read10X_h5(file = "C:/Users/rsrivast/Desktop/GSE122960/Filtered_Data/GSM3489182_Donor_01_filtered_gene_bc_matrices_h5.h5")
D4<- Read10X_h5(file = "C:/Users/rsrivast/Desktop/GSE122960/Filtered_Data/GSM3489189_Donor_04_filtered_gene_bc_matrices_h5.h5")
D3<- Read10X_h5(file = "C:/Users/rsrivast/Desktop/GSE122960/Filtered_Data/GSM3489187_Donor_03_filtered_gene_bc_matrices_h5.h5")
D6<- Read10X_h5(file = "C:/Users/rsrivast/Desktop/GSE122960/Filtered_Data/GSM3489193_Donor_06_filtered_gene_bc_matrices_h5.h5")
D1lungs = CreateSeuratObject(D1, "Old_63y")
D4lungs = CreateSeuratObject(D4, "Old_57y")
D3lungs = CreateSeuratObject(D3, "Young_29y")
D6lungs = CreateSeuratObject(D6, "Young_22y")
#Calculate mitochondrial percentage in each cell
D1lungs[["percent.mt"]] <- PercentageFeatureSet(D1lungs, pattern = "^MT-")
D4lungs[["percent.mt"]] <- PercentageFeatureSet(D4lungs, pattern = "^MT-")
D3lungs[["percent.mt"]] <- PercentageFeatureSet(D3lungs, pattern = "^MT-")
D6lungs[["percent.mt"]] <- PercentageFeatureSet(D6lungs, pattern = "^MT-")
#lungs.list=list(D1lungs, D4lungs, D3lungs, D6lungs)
#for (i in 1:length(lungs.list)) {
# lungs.list[[i]] <- PercentageFeatureSet(lungs.list[[i]],
# pattern = "^MT-", col.name = "percent.mt")
# }
#lungs.list
#head(D1lungs@meta.data)
head(D1lungs@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt
## AAACCTGAGCGTTTAC-1 Old_63y 7743 2034 4.649361
## AAACCTGAGTCGCCGT-1 Old_63y 22427 4158 5.627146
## AAACCTGGTAGGACAC-1 Old_63y 8346 2204 5.919003
## AAACCTGGTGCCTTGG-1 Old_63y 8669 2298 7.013496
## AAACCTGGTTCAGCGC-1 Old_63y 3477 1300 7.794075
## AAACCTGTCCGTAGTA-1 Old_63y 11400 2815 6.192982
lungs.list=list(D1lungs, D4lungs, D3lungs, D6lungs)
for (i in 1:length(lungs.list)) {
lungs.list[[i]] <- SCTransform(lungs.list[[i]],
vars.to.regress = "percent.mt",
verbose = FALSE)
}
# Generate violin plots
VD.list=list()
for (i in 1:length(lungs.list)) {
VD.list[[i]] <- VlnPlot(lungs.list[[i]], features = c("nFeature_SCT",
"nCount_SCT", "percent.mt"), ncol = 3, pt.size=0)
}
VD.list
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
# PLOT SCATTER
FSD.list=list()
for (i in 1:length(lungs.list)) {
FSD.list[[i]] <- FeatureScatter(lungs.list[[i]], feature1 = 'nFeature_SCT', feature2 = 'percent.mt')
}
CombinePlots(plots=FSD.list)
head(D1lungs@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt
## AAACCTGAGCGTTTAC-1 Old_63y 7743 2034 4.649361
## AAACCTGAGTCGCCGT-1 Old_63y 22427 4158 5.627146
## AAACCTGGTAGGACAC-1 Old_63y 8346 2204 5.919003
## AAACCTGGTGCCTTGG-1 Old_63y 8669 2298 7.013496
## AAACCTGGTTCAGCGC-1 Old_63y 3477 1300 7.794075
## AAACCTGTCCGTAGTA-1 Old_63y 11400 2815 6.192982
#SSD1 <- subset(sctD1, subset = nFeature_SCT > 200 & nFeature_SCT < 5000 & percent.mt < 15 & nCount_SCT < 25000 & nCount_SCT > 2000)
#SSD4 <- subset(sctD4, subset = nFeature_SCT > 200 & nFeature_SCT < 5000 & percent.mt < 15 & nCount_SCT < 25000 & nCount_SCT > 2000)
#SSD3 <- subset(sctD3, subset = nFeature_SCT > 200 & nFeature_SCT < 5000 & percent.mt < 15 & nCount_SCT < 25000 & nCount_SCT > 2000)
#SSD6 <- subset(sctD6, subset = nFeature_SCT > 200 & nFeature_SCT < 5000 & percent.mt < 15 & nCount_SCT < 25000 & nCount_SCT > 2000)
# this ensures that all necessary Pearson residuals have been calculated.
#DAVE TANG BLOG (https://davetang.org/muse/2017/08/01/getting-started-seurat/)
Data Integration (data combined - SCT)
lungs.features <- SelectIntegrationFeatures(object.list = lungs.list, nfeatures = 2500)
lungs.list <- PrepSCTIntegration(object.list = lungs.list, anchor.features = lungs.features, verbose = FALSE)
lungs.anchors <- FindIntegrationAnchors(object.list = lungs.list, normalization.method = "SCT", anchor.features = lungs.features, verbose = TRUE)
lungs.integrated <- IntegrateData(anchorset = lungs.anchors, normalization.method = "SCT", verbose = TRUE)
head(lungs.integrated@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1 Old_63y 7743 2034 4.649361 8965
## AAACCTGAGTCGCCGT-1_1 Old_63y 22427 4158 5.627146 10411
## AAACCTGGTAGGACAC-1_1 Old_63y 8346 2204 5.919003 9071
## AAACCTGGTGCCTTGG-1_1 Old_63y 8669 2298 7.013496 9150
## AAACCTGGTTCAGCGC-1_1 Old_63y 3477 1300 7.794075 8420
## AAACCTGTCCGTAGTA-1_1 Old_63y 11400 2815 6.192982 10026
## nFeature_SCT
## AAACCTGAGCGTTTAC-1_1 2032
## AAACCTGAGTCGCCGT-1_1 3263
## AAACCTGGTAGGACAC-1_1 2202
## AAACCTGGTGCCTTGG-1_1 2293
## AAACCTGGTTCAGCGC-1_1 1520
## AAACCTGTCCGTAGTA-1_1 2814
DefaultAssay(lungs.integrated)
## [1] "integrated"
head(lungs.integrated@active.assay)
## [1] "integrated"
VlnPlot(lungs.integrated, split.by="orig.ident", "SOD3")
DefaultAssay(lungs.integrated)="RNA"
VlnPlot(lungs.integrated, split.by="orig.ident", "SOD3")
DefaultAssay(lungs.integrated)="SCT"
VlnPlot(lungs.integrated, split.by="orig.ident", "SOD3")
VlnPlot(lungs.integrated, split.by="orig.ident", "SOD3", pt.size = 0)
DefaultAssay(lungs.integrated) <- "integrated"
#object <- ScaleData(object=My.integrated, verbose = TRUE) ## DONT SCALE IT
lungs.integrated <- RunPCA(object = lungs.integrated, verbose = FALSE)
#no PCs defined.
lungs.integrated
## An object of class Seurat
## 55657 features across 20163 samples within 3 assays
## Active assay: integrated (2500 features, 2500 variable features)
## 2 other assays present: RNA, SCT
## 1 dimensional reduction calculated: pca
head(lungs.integrated@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1 Old_63y 7743 2034 4.649361 8965
## AAACCTGAGTCGCCGT-1_1 Old_63y 22427 4158 5.627146 10411
## AAACCTGGTAGGACAC-1_1 Old_63y 8346 2204 5.919003 9071
## AAACCTGGTGCCTTGG-1_1 Old_63y 8669 2298 7.013496 9150
## AAACCTGGTTCAGCGC-1_1 Old_63y 3477 1300 7.794075 8420
## AAACCTGTCCGTAGTA-1_1 Old_63y 11400 2815 6.192982 10026
## nFeature_SCT
## AAACCTGAGCGTTTAC-1_1 2032
## AAACCTGAGTCGCCGT-1_1 3263
## AAACCTGGTAGGACAC-1_1 2202
## AAACCTGGTGCCTTGG-1_1 2293
## AAACCTGGTTCAGCGC-1_1 1520
## AAACCTGTCCGTAGTA-1_1 2814
ElbowPlot(lungs.integrated, ndims = 50)
ElbowPlot(lungs.integrated, ndims = 10)
## If need to check the level of significance for each clusters, run below 3 codes
#object_JS <- JackStraw(lungs.integrated, num.replicate = 100, dims = 50)
#object_JSS <- ScoreJackStraw(object = object_JS, dims = 1:50)
#JackStrawPlot(object = object_JSS, dims = 1:50)
## Explore the top PCs
DimHeatmap(lungs.integrated, dims = 1, cells = 500, balanced=TRUE)
DimHeatmap(lungs.integrated, dims = 2, cells = 500, balanced=TRUE)
DimHeatmap(lungs.integrated, dims = c(1,3), cells = 500, balanced=TRUE)
DimHeatmap(lungs.integrated, dims = 1:3, cells = 500, balanced=TRUE)
DefaultAssay(lungs.integrated)
## [1] "integrated"
lungs.integrated = RunUMAP(lungs.integrated, dims = 1:30)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 04:45:39 UMAP embedding parameters a = 0.9922 b = 1.112
## 04:45:39 Read 20163 rows and found 30 numeric columns
## 04:45:39 Using Annoy for neighbor search, n_neighbors = 30
## 04:45:39 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 04:45:42 Writing NN index file to temp file C:\Users\rsrivast\AppData\Local\Temp\1\Rtmpw3rxOC\file4434d861185
## 04:45:42 Searching Annoy index using 4 threads, search_k = 3000
## 04:45:45 Annoy recall = 100%
## 04:45:45 Commencing smooth kNN distance calibration using 4 threads
## 04:45:47 Initializing from normalized Laplacian + noise
## 04:45:48 Commencing optimization for 200 epochs, with 886830 positive edges
## 04:46:10 Optimization finished
DimPlot(lungs.integrated)
DimPlot(lungs.integrated, split.by="orig.ident")
FeaturePlot(lungs.integrated, "SOD3",split.by="orig.ident", cols=c("grey","red","black"))
#RUN tSNE
## Find neighbor using nearest neighbor algorithm & tSNE - "Integrated"
lungs.integrated <- FindNeighbors(lungs.integrated, dims = 1:30) # dim=1:10
## Computing nearest neighbor graph
## Computing SNN
lungs.integrated = RunTSNE(lungs.integrated, dims = 1:30)
DimPlot(lungs.integrated, reduction = "tsne")
DimPlot(lungs.integrated, split.by="orig.ident",reduction = "tsne")
FeaturePlot(lungs.integrated, "SOD3",split.by="orig.ident", cols=c("grey","red","black"),reduction = "tsne")
head(lungs.integrated@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1 Old_63y 7743 2034 4.649361 8965
## AAACCTGAGTCGCCGT-1_1 Old_63y 22427 4158 5.627146 10411
## AAACCTGGTAGGACAC-1_1 Old_63y 8346 2204 5.919003 9071
## AAACCTGGTGCCTTGG-1_1 Old_63y 8669 2298 7.013496 9150
## AAACCTGGTTCAGCGC-1_1 Old_63y 3477 1300 7.794075 8420
## AAACCTGTCCGTAGTA-1_1 Old_63y 11400 2815 6.192982 10026
## nFeature_SCT
## AAACCTGAGCGTTTAC-1_1 2032
## AAACCTGAGTCGCCGT-1_1 3263
## AAACCTGGTAGGACAC-1_1 2202
## AAACCTGGTGCCTTGG-1_1 2293
## AAACCTGGTTCAGCGC-1_1 1520
## AAACCTGTCCGTAGTA-1_1 2814
head(lungs.integrated@active.ident)
## AAACCTGAGCGTTTAC-1_1 AAACCTGAGTCGCCGT-1_1 AAACCTGGTAGGACAC-1_1
## Old_63y Old_63y Old_63y
## AAACCTGGTGCCTTGG-1_1 AAACCTGGTTCAGCGC-1_1 AAACCTGTCCGTAGTA-1_1
## Old_63y Old_63y Old_63y
## Levels: Old_57y Old_63y Young_22y Young_29y
# find clusters (assay:I = integrated)
objectI10 <- FindClusters(lungs.integrated, resolution = 0.10)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 20163
## Number of edges: 748466
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9620
## Number of communities: 10
## Elapsed time: 3 seconds
## Warning: UNRELIABLE VALUE: Future ('future_lapply-1') unexpectedly generated
## random numbers without specifying argument 'seed'. There is a risk that those
## random numbers are not statistically sound and the overall results might be
## invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-
## safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this
## check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore".
AU=DimPlot(objectI10)
AP=DimPlot(objectI10,reduction = "pca")
AT=DimPlot(objectI10,reduction = "tsne")
CombinePlots(list(AU,AP,AT))
## Warning: CombinePlots is being deprecated. Plots should now be combined using
## the patchwork system.
objectI20 <- FindClusters(lungs.integrated, resolution = 0.20)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 20163
## Number of edges: 748466
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9343
## Number of communities: 14
## Elapsed time: 3 seconds
## Warning: UNRELIABLE VALUE: Future ('future_lapply-1') unexpectedly generated
## random numbers without specifying argument 'seed'. There is a risk that those
## random numbers are not statistically sound and the overall results might be
## invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-
## safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this
## check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore".
AU=DimPlot(objectI20)
AP=DimPlot(objectI20,reduction = "pca")
AT=DimPlot(objectI20,reduction = "tsne")
CombinePlots(list(AU,AP,AT))
## Warning: CombinePlots is being deprecated. Plots should now be combined using
## the patchwork system.
objectI30 <- FindClusters(lungs.integrated, resolution = 0.30)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 20163
## Number of edges: 748466
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9161
## Number of communities: 19
## Elapsed time: 3 seconds
## Warning: UNRELIABLE VALUE: Future ('future_lapply-1') unexpectedly generated
## random numbers without specifying argument 'seed'. There is a risk that those
## random numbers are not statistically sound and the overall results might be
## invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-
## safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this
## check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore".
AU=DimPlot(objectI30)
AP=DimPlot(objectI30,reduction = "pca")
AT=DimPlot(objectI30,reduction = "tsne")
CombinePlots(list(AU,AP,AT))
## Warning: CombinePlots is being deprecated. Plots should now be combined using
## the patchwork system.
objectI50 <- FindClusters(lungs.integrated, resolution = 0.50)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 20163
## Number of edges: 748466
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8943
## Number of communities: 25
## Elapsed time: 3 seconds
## Warning: UNRELIABLE VALUE: Future ('future_lapply-1') unexpectedly generated
## random numbers without specifying argument 'seed'. There is a risk that those
## random numbers are not statistically sound and the overall results might be
## invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-
## safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this
## check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore".
AU=DimPlot(objectI50)
AP=DimPlot(objectI50,reduction = "pca")
AT=DimPlot(objectI50,reduction = "tsne")
CombinePlots(list(AU,AP,AT))
## Warning: CombinePlots is being deprecated. Plots should now be combined using
## the patchwork system.
objectI75 <- FindClusters(lungs.integrated, resolution = 0.75)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 20163
## Number of edges: 748466
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8750
## Number of communities: 26
## Elapsed time: 3 seconds
## Warning: UNRELIABLE VALUE: Future ('future_lapply-1') unexpectedly generated
## random numbers without specifying argument 'seed'. There is a risk that those
## random numbers are not statistically sound and the overall results might be
## invalid. To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-
## safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this
## check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore".
AU=DimPlot(objectI75)
AP=DimPlot(objectI75,reduction = "pca")
AT=DimPlot(objectI75,reduction = "tsne")
CombinePlots(list(AU,AP,AT))
## Warning: CombinePlots is being deprecated. Plots should now be combined using
## the patchwork system.
#plots <- DimPlot(objectI75, combine = F, group.by="seurat_clusters")
#plots <- lapply(X = plots, FUN = function(x) x + theme(legend.position = "top") + guides(color = guide_legend(nrow = 4, byrow = TRUE, override.aes = list(size = 4))))
#CombinePlots(plots)
## Find average exression of integrated object
I = AverageExpression(objectI30, return.seurat = FALSE, verbose = TRUE)
## Finished averaging RNA for cluster 0
## Finished averaging RNA for cluster 1
## Finished averaging RNA for cluster 2
## Finished averaging RNA for cluster 3
## Finished averaging RNA for cluster 4
## Finished averaging RNA for cluster 5
## Finished averaging RNA for cluster 6
## Finished averaging RNA for cluster 7
## Finished averaging RNA for cluster 8
## Finished averaging RNA for cluster 9
## Finished averaging RNA for cluster 10
## Finished averaging RNA for cluster 11
## Finished averaging RNA for cluster 12
## Finished averaging RNA for cluster 13
## Finished averaging RNA for cluster 14
## Finished averaging RNA for cluster 15
## Finished averaging RNA for cluster 16
## Finished averaging RNA for cluster 17
## Finished averaging RNA for cluster 18
## Finished averaging SCT for cluster 0
## Finished averaging SCT for cluster 1
## Finished averaging SCT for cluster 2
## Finished averaging SCT for cluster 3
## Finished averaging SCT for cluster 4
## Finished averaging SCT for cluster 5
## Finished averaging SCT for cluster 6
## Finished averaging SCT for cluster 7
## Finished averaging SCT for cluster 8
## Finished averaging SCT for cluster 9
## Finished averaging SCT for cluster 10
## Finished averaging SCT for cluster 11
## Finished averaging SCT for cluster 12
## Finished averaging SCT for cluster 13
## Finished averaging SCT for cluster 14
## Finished averaging SCT for cluster 15
## Finished averaging SCT for cluster 16
## Finished averaging SCT for cluster 17
## Finished averaging SCT for cluster 18
## Finished averaging integrated for cluster 0
## Finished averaging integrated for cluster 1
## Finished averaging integrated for cluster 2
## Finished averaging integrated for cluster 3
## Finished averaging integrated for cluster 4
## Finished averaging integrated for cluster 5
## Finished averaging integrated for cluster 6
## Finished averaging integrated for cluster 7
## Finished averaging integrated for cluster 8
## Finished averaging integrated for cluster 9
## Finished averaging integrated for cluster 10
## Finished averaging integrated for cluster 11
## Finished averaging integrated for cluster 12
## Finished averaging integrated for cluster 13
## Finished averaging integrated for cluster 14
## Finished averaging integrated for cluster 15
## Finished averaging integrated for cluster 16
## Finished averaging integrated for cluster 17
## Finished averaging integrated for cluster 18
head (I$integrated)
## 0 1 2 3 4 5
## SFTPC -0.7577971 7520.93451 4441.38065 8314.7081 7030.527554 -0.1145684
## SCGB3A2 20.8001658 1097.62823 2743.64639 171972.4259 31827.798650 33.4123979
## SFTPA1 -0.9924176 11414.10460 230.78176 7007.2059 7686.698931 -0.5249207
## SFTPA2 -0.9890068 9577.65346 62.63899 9076.0280 6265.392279 -0.8457792
## SFTPB -0.9976907 6765.54315 5315.30905 6515.0541 5885.278670 -0.2626763
## SPARCL1 233.5700241 68.57914 50.12627 192.4844 2.476644 549.8199419
## 6 7 8 9 10
## SFTPC -0.9987619 9824.7518918 33.7171912 746.77442 514.2621
## SCGB3A2 -0.8932550 6114.4667401 11.9692361 138758.81952 558.4839
## SFTPA1 -0.9950266 2425.3277670 8.1081240 59.35526 341.5823
## SFTPA2 -0.9918993 3476.4300963 -0.9027219 180.27893 167.6429
## SFTPB -0.9990280 1561.1699410 -0.8205104 2548.91917 338.2328
## SPARCL1 307.2645181 0.6591703 384.5434449 5452.00806 683898.1073
## 11 12 13 14 15 16
## SFTPC 2470.61403 69.40604 6947.514 35.1313896 489.339273 5.768162e+00
## SCGB3A2 5069.28376 41567.94100 6202.421 -0.8918727 10.189210 5.080986e+02
## SFTPA1 3032.74207 26.56835 3297.218 0.5093533 272.844071 1.118693e+01
## SFTPA2 1941.81013 20.10803 3128.639 3.1897104 383.336544 1.159158e+01
## SFTPB 4587.76935 160.54946 1145.892 -0.9674545 666.250234 6.463162e+00
## SPARCL1 19.03299 4212.82989 4211.095 261.8149211 0.455411 3.200536e+05
## 17 18
## SFTPC -0.99930252 1.183632e+03
## SCGB3A2 416.10183968 1.158033e+04
## SFTPA1 -0.99602740 2.258294e-01
## SFTPA2 -0.99354773 1.453671e+01
## SFTPB -0.99564600 2.503477e+02
## SPARCL1 -0.06841855 1.127384e+04
head (I$RNA)
## 0 1 2 3 4
## RP11-34P13.3 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## FAM138A 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## OR4F5 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## RP11-34P13.7 0.003971184 0.003953378 0.002627342 0.004737909 0.004028797
## RP11-34P13.8 0.000000000 0.000359398 0.000000000 0.000000000 0.000000000
## RP11-34P13.14 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## 5 6 7 8 9 10 11
## RP11-34P13.3 0.000000000 0.000000000 0.000000000 0 0.000000000 0 0
## FAM138A 0.000000000 0.000000000 0.000000000 0 0.000000000 0 0
## OR4F5 0.000000000 0.000000000 0.000000000 0 0.000000000 0 0
## RP11-34P13.7 0.004145433 0.002098024 0.003158606 0 0.005352903 0 0
## RP11-34P13.8 0.002072716 0.000000000 0.000000000 0 0.000000000 0 0
## RP11-34P13.14 0.000000000 0.000000000 0.000000000 0 0.000000000 0 0
## 12 13 14 15 16 17 18
## RP11-34P13.3 0.000000000 0 0.000000000 0 0 0 0
## FAM138A 0.000000000 0 0.000000000 0 0 0 0
## OR4F5 0.000000000 0 0.000000000 0 0 0 0
## RP11-34P13.7 0.007775031 0 0.008766744 0 0 0 0
## RP11-34P13.8 0.000000000 0 0.000000000 0 0 0 0
## RP11-34P13.14 0.000000000 0 0.000000000 0 0 0 0
head(I$SCT)
## 0 1 2 3 4
## RP11-34P13.7 0.0021666907 0.002300774 0.001529052 0.002757353 0.002344666
## FO538757.2 0.1821464683 0.172976365 0.193679918 0.211397059 0.196951934
## AP006222.2 0.1385237614 0.037230705 0.044342508 0.036764706 0.026963658
## RP4-669L17.10 0.0028889210 0.003764903 0.002548420 0.003676471 0.001172333
## RP5-857K21.4 0.0002888921 0.001673290 0.000509684 0.002757353 0.003516999
## RP11-206L10.9 0.0169001878 0.033884125 0.022935780 0.027573529 0.031652989
## 5 6 7 8 9
## RP11-34P13.7 0.002412545 0.001221001 0.001838235 0.00000000 0.003115265
## FO538757.2 0.213510253 0.191697192 0.327205882 0.13232104 0.161993769
## AP006222.2 0.034981906 0.152625153 0.066176471 0.01084599 0.015576324
## RP4-669L17.10 0.002412545 0.001221001 0.003676471 0.00000000 0.000000000
## RP5-857K21.4 0.001206273 0.000000000 0.000000000 0.00000000 0.000000000
## RP11-206L10.9 0.015681544 0.013431013 0.031250000 0.01952278 0.009345794
## 10 11 12 13 14
## RP11-34P13.7 0.00000000 0.00000000 0.00000000 0.00000000 0.005102041
## FO538757.2 0.16014235 0.16605166 0.19457014 0.25242718 0.224489796
## AP006222.2 0.05338078 0.03690037 0.01809955 0.12621359 0.173469388
## RP4-669L17.10 0.00000000 0.00000000 0.01357466 0.01456311 0.000000000
## RP5-857K21.4 0.00000000 0.00000000 0.00000000 0.00000000 0.000000000
## RP11-206L10.9 0.00000000 0.02214022 0.03619910 0.05825243 0.020408163
## 15 16 17 18
## RP11-34P13.7 0.000000000 0.00000000 0.00000000 0.00000000
## FO538757.2 0.271794872 0.25581395 0.29230769 0.22950820
## AP006222.2 0.194871795 0.03488372 0.15384615 0.01639344
## RP4-669L17.10 0.005128205 0.02325581 0.00000000 0.00000000
## RP5-857K21.4 0.000000000 0.00000000 0.00000000 0.00000000
## RP11-206L10.9 0.035897436 0.03488372 0.03076923 0.01639344
Idents(objectI30)="orig.ident"
DefaultAssay(objectI30)="SCT"
DoHeatmap(objectI30,features=c("SOD3","LRRK2"))
DefaultAssay(objectI30)="integrated"
DoHeatmap(objectI30,features=c("SOD3","LRRK2"))
Idents(objectI30)="seurat_clusters"
#saveRDS(object1,"Object1_10X.rds")
#Trick for assigning additional column. For instance - condition
DefaultAssay(objectI30)="SCT"
head(objectI30@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1 Old_63y 7743 2034 4.649361 8965
## AAACCTGAGTCGCCGT-1_1 Old_63y 22427 4158 5.627146 10411
## AAACCTGGTAGGACAC-1_1 Old_63y 8346 2204 5.919003 9071
## AAACCTGGTGCCTTGG-1_1 Old_63y 8669 2298 7.013496 9150
## AAACCTGGTTCAGCGC-1_1 Old_63y 3477 1300 7.794075 8420
## AAACCTGTCCGTAGTA-1_1 Old_63y 11400 2815 6.192982 10026
## nFeature_SCT integrated_snn_res.0.3 seurat_clusters
## AAACCTGAGCGTTTAC-1_1 2032 1 1
## AAACCTGAGTCGCCGT-1_1 3263 4 4
## AAACCTGGTAGGACAC-1_1 2202 2 2
## AAACCTGGTGCCTTGG-1_1 2293 1 1
## AAACCTGGTTCAGCGC-1_1 1520 9 9
## AAACCTGTCCGTAGTA-1_1 2814 3 3
objectI30$condition = 1
temp = objectI30@meta.data
x = dim(temp)[1]
for (i in 1:x)
{
if(temp[i,1] == 'Old_57y' || temp[i,1] == 'Old_63y')
{
temp[i,9] = "Old"
}
else
{
temp[i,9] = "Young"
}
}
objectI30@meta.data=temp
head(objectI30@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1 Old_63y 7743 2034 4.649361 8965
## AAACCTGAGTCGCCGT-1_1 Old_63y 22427 4158 5.627146 10411
## AAACCTGGTAGGACAC-1_1 Old_63y 8346 2204 5.919003 9071
## AAACCTGGTGCCTTGG-1_1 Old_63y 8669 2298 7.013496 9150
## AAACCTGGTTCAGCGC-1_1 Old_63y 3477 1300 7.794075 8420
## AAACCTGTCCGTAGTA-1_1 Old_63y 11400 2815 6.192982 10026
## nFeature_SCT integrated_snn_res.0.3 seurat_clusters
## AAACCTGAGCGTTTAC-1_1 2032 1 1
## AAACCTGAGTCGCCGT-1_1 3263 4 4
## AAACCTGGTAGGACAC-1_1 2202 2 2
## AAACCTGGTGCCTTGG-1_1 2293 1 1
## AAACCTGGTTCAGCGC-1_1 1520 9 9
## AAACCTGTCCGTAGTA-1_1 2814 3 3
## condition
## AAACCTGAGCGTTTAC-1_1 Old
## AAACCTGAGTCGCCGT-1_1 Old
## AAACCTGGTAGGACAC-1_1 Old
## AAACCTGGTGCCTTGG-1_1 Old
## AAACCTGGTTCAGCGC-1_1 Old
## AAACCTGTCCGTAGTA-1_1 Old
VlnPlot(objectI30, "SOD3", split.by = "orig.ident")
VlnPlot(objectI30, "SOD3", split.by = "condition")
#plots <- DimPlot(lungs.integrated, "SOD3", group.by = c("orig.ident","condition"), combine = F)
#plots <- lapply(X = plots, FUN = function(x) x + theme(legend.position = "top") + guides(color = guide_legend(nrow = 4, byrow = TRUE, override.aes = list(size = 4))))
#CombinePlots(plots)
DefaultAssay(objectI30)="SCT"
objectI30
## An object of class Seurat
## 55657 features across 20163 samples within 3 assays
## Active assay: SCT (19463 features, 0 variable features)
## 2 other assays present: RNA, integrated
## 3 dimensional reductions calculated: pca, umap, tsne
objectI30_markers=FindAllMarkers(objectI30,only.pos = T,logfc.threshold = 0.25, min.pct = 0.10)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
## Calculating cluster 12
## Calculating cluster 13
## Calculating cluster 14
## Calculating cluster 15
## Calculating cluster 16
## Calculating cluster 17
## Calculating cluster 18
tail(objectI30_markers)
## p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene
## CLK1 0.006228930 0.2517279 0.393 0.281 1 18 CLK1
## CRBN 0.006843296 0.2553573 0.295 0.196 1 18 CRBN
## PLP22 0.006845073 0.2839817 0.443 0.323 1 18 PLP2
## NOP561 0.008829589 0.2536799 0.295 0.197 1 18 NOP56
## AHNAK5 0.008845046 0.3642356 0.508 0.448 1 18 AHNAK
## HNRNPK1 0.009698877 0.2818846 0.836 0.691 1 18 HNRNPK
FeaturePlot(objectI30, features = c("C1QB","C1QA","FABP4","SOD3"), cols=c("grey","red", "black"))
FeaturePlot(objectI30, features = c("C1QB","C1QA","FABP4","SOD3"), cols=c("grey","red", "black"),reduction = "tsne")
#write.table(objectI30_markers,"All_marker_Clusters.txt") # Explore the table
## Random try
#DefaultAssay(objectI30)="SCT"
FeaturePlot(objectI30, features = "SOD3",cols=c("grey","red", "black"))
FeaturePlot(objectI30, features = "SOD3",cols=c("grey","red", "black"), split.by="orig.ident")
FeaturePlot(objectI30, features = c("SOD3", "TMPRSS2"),cols=c("grey","red", "black"))
FeaturePlot(objectI30, features = c("SOD3", "TMPRSS2"), split.by = "orig.ident", cols=c("grey","red", "black"))
VlnPlot(objectI30, features = c("SOD3", "TMPRSS2"), split.by = "orig.ident")
Idents(objectI30)="condition"
FeaturePlot(objectI30, features = c("SOD3", "TMPRSS2"),cols=c("grey","red", "black"))
FeaturePlot(objectI30, features = c("SOD3", "TMPRSS2"), split.by = "condition", cols=c("grey","red", "black"))
VlnPlot(objectI30, features = c("SOD3", "TMPRSS2"), split.by = "orig.ident")
## Note: scaling up is an important step, if not done previously, can be implemented for selected genes
Idents(objectI30)="seurat_clusters"
top5 <- objectI30_markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_logFC)
head(top5)
## # A tibble: 6 x 7
## # Groups: cluster [2]
## p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0 1.86 0.996 0.479 0 0 C1QB
## 2 0 1.77 0.993 0.438 0 0 C1QA
## 3 0 1.74 0.999 0.783 0 0 APOC1
## 4 0 1.62 0.939 0.355 0 0 FABP4
## 5 0 1.59 0.982 0.389 0 0 CD52
## 6 0 1.49 1 0.801 0 1 SFTPA1
DefaultAssay(objectI30)
## [1] "SCT"
#T5 <- ScaleData(object1, features = top5$gene)
DoHeatmap(objectI30, features = top5$gene) + NoLegend()
Idents(objectI30)="condition"
DoHeatmap(objectI30, features = top5$gene) + NoLegend()
Idents(objectI30)="orig.ident"
DoHeatmap(objectI30, features = top5$gene) + NoLegend()
Data Integration (data combined)
table(objectI30@meta.data$seurat_clusters,objectI30@meta.data$condition)
##
## Old Young
## 0 3454 3469
## 1 2709 2072
## 2 1310 652
## 3 801 287
## 4 480 373
## 5 218 611
## 6 375 444
## 7 206 338
## 8 135 326
## 9 161 160
## 10 138 143
## 11 147 124
## 12 126 95
## 13 117 89
## 14 87 109
## 15 63 132
## 16 28 58
## 17 32 33
## 18 41 20
head(objectI30@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1 Old_63y 7743 2034 4.649361 8965
## AAACCTGAGTCGCCGT-1_1 Old_63y 22427 4158 5.627146 10411
## AAACCTGGTAGGACAC-1_1 Old_63y 8346 2204 5.919003 9071
## AAACCTGGTGCCTTGG-1_1 Old_63y 8669 2298 7.013496 9150
## AAACCTGGTTCAGCGC-1_1 Old_63y 3477 1300 7.794075 8420
## AAACCTGTCCGTAGTA-1_1 Old_63y 11400 2815 6.192982 10026
## nFeature_SCT integrated_snn_res.0.3 seurat_clusters
## AAACCTGAGCGTTTAC-1_1 2032 1 1
## AAACCTGAGTCGCCGT-1_1 3263 4 4
## AAACCTGGTAGGACAC-1_1 2202 2 2
## AAACCTGGTGCCTTGG-1_1 2293 1 1
## AAACCTGGTTCAGCGC-1_1 1520 9 9
## AAACCTGTCCGTAGTA-1_1 2814 3 3
## condition
## AAACCTGAGCGTTTAC-1_1 Old
## AAACCTGAGTCGCCGT-1_1 Old
## AAACCTGGTAGGACAC-1_1 Old
## AAACCTGGTGCCTTGG-1_1 Old
## AAACCTGGTTCAGCGC-1_1 Old
## AAACCTGTCCGTAGTA-1_1 Old
objectI30$Cond_Clus = 1
objectI30$Cond_Clus = paste(objectI30$condition, objectI30$seurat_clusters, sep = "_")
#table(objectI30@meta.data$seurat_clusters,objectI30@meta.data$condition)
Idents(objectI30)="seurat_clusters"
hpca <- HumanPrimaryCellAtlasData()
singler_2=SingleR(de.method ="classic", test=objectI30@assays$RNA@data, clusters = objectI30@meta.data$seurat_clusters, genes = "de", quantile = 0.8, tune.thresh = 0.05, fine.tune = TRUE, sd.thresh = 1, ref=hpca@assays@data$logcounts,labels=hpca$label.main)
label_cells=singler_2$first.labels
label_cells
## [1] "Macrophage" "Epithelial_cells" "Epithelial_cells"
## [4] "Epithelial_cells" "Epithelial_cells" "Monocyte"
## [7] "Macrophage" "Epithelial_cells" "NK_cell"
## [10] "Epithelial_cells" "Endothelial_cells" "Epithelial_cells"
## [13] "Epithelial_cells" "Macrophage" "Macrophage"
## [16] "Macrophage" "Fibroblasts" "Endothelial_cells"
## [19] "Macrophage"
Idents(objectI30)=plyr::mapvalues(Idents(objectI30),from = c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18), to = label_cells)
DimPlot(objectI30,split.by="condition", reduction = "tsne")
FeaturePlot(objectI30,"SOD3", label=T, cols=c("grey","orange","red","black"),reduction = "tsne")
FeaturePlot(objectI30,"SOD3", split.by="condition", label=T, cols = c("grey","orange","red","black"),reduction = "tsne")
VlnPlot(objectI30, split.by="condition","SOD3")
Idents(objectI30)="condition"
DefaultAssay(objectI30)="SCT"
head(objectI30@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1 Old_63y 7743 2034 4.649361 8965
## AAACCTGAGTCGCCGT-1_1 Old_63y 22427 4158 5.627146 10411
## AAACCTGGTAGGACAC-1_1 Old_63y 8346 2204 5.919003 9071
## AAACCTGGTGCCTTGG-1_1 Old_63y 8669 2298 7.013496 9150
## AAACCTGGTTCAGCGC-1_1 Old_63y 3477 1300 7.794075 8420
## AAACCTGTCCGTAGTA-1_1 Old_63y 11400 2815 6.192982 10026
## nFeature_SCT integrated_snn_res.0.3 seurat_clusters
## AAACCTGAGCGTTTAC-1_1 2032 1 1
## AAACCTGAGTCGCCGT-1_1 3263 4 4
## AAACCTGGTAGGACAC-1_1 2202 2 2
## AAACCTGGTGCCTTGG-1_1 2293 1 1
## AAACCTGGTTCAGCGC-1_1 1520 9 9
## AAACCTGTCCGTAGTA-1_1 2814 3 3
## condition Cond_Clus
## AAACCTGAGCGTTTAC-1_1 Old Old_1
## AAACCTGAGTCGCCGT-1_1 Old Old_4
## AAACCTGGTAGGACAC-1_1 Old Old_2
## AAACCTGGTGCCTTGG-1_1 Old Old_1
## AAACCTGGTTCAGCGC-1_1 Old Old_9
## AAACCTGTCCGTAGTA-1_1 Old Old_3
objectI30_DEGs <- FindMarkers(objectI30, ident.1 = "Young", ident.2 = "Old",logfc.threshold = 0.25, min.pct=0.30)
head (objectI30_DEGs,50)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## APOE 0.000000e+00 1.0764221 0.630 0.312 0.000000e+00
## RP11-1143G9.4 0.000000e+00 0.6137343 0.412 0.148 0.000000e+00
## SNHG25 0.000000e+00 0.6080597 0.815 0.516 0.000000e+00
## PABPC4 0.000000e+00 0.4904915 0.846 0.578 0.000000e+00
## XIST 0.000000e+00 0.4835448 0.754 0.371 0.000000e+00
## CHCHD10 0.000000e+00 0.4769061 0.536 0.230 0.000000e+00
## MT-ATP8 0.000000e+00 0.4266514 0.630 0.394 0.000000e+00
## ATP5D 0.000000e+00 0.4093635 0.629 0.431 0.000000e+00
## EIF2S2 0.000000e+00 0.3899471 0.891 0.730 0.000000e+00
## CBX3 0.000000e+00 0.3839203 0.796 0.569 0.000000e+00
## TBCA 0.000000e+00 0.3329524 0.950 0.857 0.000000e+00
## MT-ND3 0.000000e+00 -0.3256366 0.998 1.000 0.000000e+00
## FOSB 0.000000e+00 -0.3537806 0.245 0.497 0.000000e+00
## CD9 0.000000e+00 -0.3835476 0.831 0.923 0.000000e+00
## MT-CO3 0.000000e+00 -0.4184467 0.998 1.000 0.000000e+00
## SOCS3 0.000000e+00 -0.4297941 0.196 0.482 0.000000e+00
## MT-ND4 0.000000e+00 -0.4438800 1.000 1.000 0.000000e+00
## MT-CO2 0.000000e+00 -0.4475630 0.999 1.000 0.000000e+00
## RPS10 0.000000e+00 -0.4789120 0.983 0.993 0.000000e+00
## MT-ATP6 0.000000e+00 -0.4998865 0.999 1.000 0.000000e+00
## MT-CYB 0.000000e+00 -0.5529803 0.998 1.000 0.000000e+00
## RHOB 0.000000e+00 -0.5540921 0.395 0.689 0.000000e+00
## JUNB 0.000000e+00 -0.5569892 0.474 0.772 0.000000e+00
## HSP90AA1 0.000000e+00 -0.5578679 0.889 0.970 0.000000e+00
## KLF6 0.000000e+00 -0.6010049 0.742 0.930 0.000000e+00
## SFTPC 0.000000e+00 -0.6100808 0.974 0.993 0.000000e+00
## ZFP36 0.000000e+00 -0.6174962 0.554 0.866 0.000000e+00
## SCGB3A2 0.000000e+00 -0.6376392 0.227 0.472 0.000000e+00
## FOS 0.000000e+00 -0.6391686 0.730 0.955 0.000000e+00
## HSPA1B 0.000000e+00 -0.6977429 0.103 0.431 0.000000e+00
## JUN 0.000000e+00 -0.7589664 0.543 0.847 0.000000e+00
## HSPA1A 0.000000e+00 -0.8406444 0.156 0.466 0.000000e+00
## HLA-DRB5 0.000000e+00 -0.8869007 0.418 0.791 0.000000e+00
## SLPI 0.000000e+00 -0.8980185 0.733 0.821 0.000000e+00
## RPS4Y1 0.000000e+00 -1.2249332 0.001 0.501 0.000000e+00
## RPS27L 4.566876e-303 0.3585116 0.963 0.863 8.888510e-299
## DDX5 6.809032e-302 -0.2618043 0.877 0.964 1.325242e-297
## NPC2 1.363993e-290 -0.2619844 0.960 0.980 2.654740e-286
## ATP5I 4.258358e-280 0.3510069 0.938 0.861 8.288041e-276
## CXCL2 4.240879e-270 -0.4455831 0.373 0.607 8.254024e-266
## FABP5 1.371030e-269 -0.4519820 0.878 0.919 2.668436e-265
## GADD45B 2.124888e-268 -0.4879368 0.307 0.527 4.135669e-264
## NFKBIA 5.901474e-259 -0.3687697 0.706 0.849 1.148604e-254
## RPS11 2.981106e-249 -0.4309405 0.997 0.995 5.802126e-245
## HLA-B 2.743128e-248 0.3528311 0.976 0.901 5.338951e-244
## ELF3 2.153854e-239 -0.4552607 0.227 0.430 4.192045e-235
## MT-ND2 2.189784e-238 -0.3792294 1.000 1.000 4.261976e-234
## AGR2 5.233998e-234 -0.3950638 0.188 0.386 1.018693e-229
## MT-CO1 1.517712e-233 -0.6005171 1.000 1.000 2.953923e-229
## RPS20 2.215862e-229 -0.3786682 1.000 1.000 4.312733e-225
#LATER
head(objectI30@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1 Old_63y 7743 2034 4.649361 8965
## AAACCTGAGTCGCCGT-1_1 Old_63y 22427 4158 5.627146 10411
## AAACCTGGTAGGACAC-1_1 Old_63y 8346 2204 5.919003 9071
## AAACCTGGTGCCTTGG-1_1 Old_63y 8669 2298 7.013496 9150
## AAACCTGGTTCAGCGC-1_1 Old_63y 3477 1300 7.794075 8420
## AAACCTGTCCGTAGTA-1_1 Old_63y 11400 2815 6.192982 10026
## nFeature_SCT integrated_snn_res.0.3 seurat_clusters
## AAACCTGAGCGTTTAC-1_1 2032 1 1
## AAACCTGAGTCGCCGT-1_1 3263 4 4
## AAACCTGGTAGGACAC-1_1 2202 2 2
## AAACCTGGTGCCTTGG-1_1 2293 1 1
## AAACCTGGTTCAGCGC-1_1 1520 9 9
## AAACCTGTCCGTAGTA-1_1 2814 3 3
## condition Cond_Clus
## AAACCTGAGCGTTTAC-1_1 Old Old_1
## AAACCTGAGTCGCCGT-1_1 Old Old_4
## AAACCTGGTAGGACAC-1_1 Old Old_2
## AAACCTGGTGCCTTGG-1_1 Old Old_1
## AAACCTGGTTCAGCGC-1_1 Old Old_9
## AAACCTGTCCGTAGTA-1_1 Old Old_3
cluster1=subset(objectI30,subset=seurat_clusters==1)
#Cluster the cells by specific pathways [Next plan]
# Ex 80 genes, Cluster the cells
head(cluster1)
## An object of class Seurat
## 12 features across 4781 samples within 2 assays
## Active assay: SCT (6 features, 0 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: umap, tsne
DimPlot(cluster1, split.by="orig.ident")
DimPlot(cluster1, split.by="condition")
Idents(cluster1)="condition"
head(cluster1@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1 Old_63y 7743 2034 4.649361 8965
## AAACCTGGTGCCTTGG-1_1 Old_63y 8669 2298 7.013496 9150
## AAACGGGAGCCGATTT-1_1 Old_63y 8977 2216 7.708589 9260
## AAACGGGAGGTTCCTA-1_1 Old_63y 10164 2427 4.338843 9720
## AAACGGGGTCCTCCAT-1_1 Old_63y 13847 2919 4.759154 10216
## AAAGATGCAAGGGTCA-1_1 Old_63y 7322 1651 7.456979 8974
## nFeature_SCT integrated_snn_res.0.3 seurat_clusters
## AAACCTGAGCGTTTAC-1_1 2032 1 1
## AAACCTGGTGCCTTGG-1_1 2293 1 1
## AAACGGGAGCCGATTT-1_1 2213 1 1
## AAACGGGAGGTTCCTA-1_1 2426 1 1
## AAACGGGGTCCTCCAT-1_1 2890 1 1
## AAAGATGCAAGGGTCA-1_1 1650 1 1
## condition Cond_Clus
## AAACCTGAGCGTTTAC-1_1 Old Old_1
## AAACCTGGTGCCTTGG-1_1 Old Old_1
## AAACGGGAGCCGATTT-1_1 Old Old_1
## AAACGGGAGGTTCCTA-1_1 Old Old_1
## AAACGGGGTCCTCCAT-1_1 Old Old_1
## AAAGATGCAAGGGTCA-1_1 Old Old_1
Y=subset(cluster1,subset=condition=="Young")
O=subset(cluster1,subset=condition=="Old")
mergeClus1=merge(x = Y, y = O)
head(mergeClus1@active.ident)
## AAACCTGCAAGTAATG-1_3 AAACGGGTCGCTGATA-1_3 AAAGATGTCAGCACAT-1_3
## Young Young Young
## AAAGCAAAGATGTGGC-1_3 AAAGCAAAGCCGCCTA-1_3 AAAGCAACAAAGGAAG-1_3
## Young Young Young
## Levels: Old Young
head(mergeClus1@active.assay)
## [1] "SCT"
DefaultAssay(mergeClus1) <- "SCT"
head(mergeClus1@active.assay)
## [1] "SCT"
FAmergeClus1 <- FindVariableFeatures(object = mergeClus1, selection.method = "vst", nfeatures = 2000)
FAmergeClus1 <- ScaleData(FAmergeClus1, verbose = TRUE)
FAClus1 <- RunPCA(object = FAmergeClus1, npcs = 50, verbose = FALSE)
DimHeatmap(object = FAClus1, dims = 1, cells = 500, balanced=TRUE)
ElbowPlot(FAClus1, ndims = 50)
FAClus1 <- FindNeighbors(object = FAClus1, dims = 1:5)
FAClus1= RunTSNE(FAClus1, dims=1:5)
head (FAClus1@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGCAAGTAATG-1_3 Young_29y 1416 737 9.816384 3237
## AAACGGGTCGCTGATA-1_3 Young_29y 5103 2038 2.900255 4278
## AAAGATGTCAGCACAT-1_3 Young_29y 2980 1244 2.382550 3424
## AAAGCAAAGATGTGGC-1_3 Young_29y 3817 1537 2.279277 3806
## AAAGCAAAGCCGCCTA-1_3 Young_29y 4412 1736 3.014506 4066
## AAAGCAACAAAGGAAG-1_3 Young_29y 4240 1709 6.438679 3999
## nFeature_SCT integrated_snn_res.0.3 seurat_clusters
## AAACCTGCAAGTAATG-1_3 850 1 1
## AAACGGGTCGCTGATA-1_3 2029 1 1
## AAAGATGTCAGCACAT-1_3 1242 1 1
## AAAGCAAAGATGTGGC-1_3 1536 1 1
## AAAGCAAAGCCGCCTA-1_3 1735 1 1
## AAAGCAACAAAGGAAG-1_3 1705 1 1
## condition Cond_Clus
## AAACCTGCAAGTAATG-1_3 Young Young_1
## AAACGGGTCGCTGATA-1_3 Young Young_1
## AAAGATGTCAGCACAT-1_3 Young Young_1
## AAAGCAAAGATGTGGC-1_3 Young Young_1
## AAAGCAAAGCCGCCTA-1_3 Young Young_1
## AAAGCAACAAAGGAAG-1_3 Young Young_1
Idents(cluster1)="seurat_clusters"
DimPlot(FAClus1)
DimPlot(FAClus1, split.by="orig.ident")
FeaturePlot(FAClus1,"SOD3", split.by="condition")
FeaturePlot(FAClus1,"SOD3", cols=c("grey","orange","red"))
FeaturePlot(FAClus1,"SOD3", split.by="orig.ident", cols=c("grey","orange","red"))
#TRY ONE MORE ATTEMPT WITH
Idents(cluster1)="seurat_clusters"
head(cluster1@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGAGCGTTTAC-1_1 Old_63y 7743 2034 4.649361 8965
## AAACCTGGTGCCTTGG-1_1 Old_63y 8669 2298 7.013496 9150
## AAACGGGAGCCGATTT-1_1 Old_63y 8977 2216 7.708589 9260
## AAACGGGAGGTTCCTA-1_1 Old_63y 10164 2427 4.338843 9720
## AAACGGGGTCCTCCAT-1_1 Old_63y 13847 2919 4.759154 10216
## AAAGATGCAAGGGTCA-1_1 Old_63y 7322 1651 7.456979 8974
## nFeature_SCT integrated_snn_res.0.3 seurat_clusters
## AAACCTGAGCGTTTAC-1_1 2032 1 1
## AAACCTGGTGCCTTGG-1_1 2293 1 1
## AAACGGGAGCCGATTT-1_1 2213 1 1
## AAACGGGAGGTTCCTA-1_1 2426 1 1
## AAACGGGGTCCTCCAT-1_1 2890 1 1
## AAAGATGCAAGGGTCA-1_1 1650 1 1
## condition Cond_Clus
## AAACCTGAGCGTTTAC-1_1 Old Old_1
## AAACCTGGTGCCTTGG-1_1 Old Old_1
## AAACGGGAGCCGATTT-1_1 Old Old_1
## AAACGGGAGGTTCCTA-1_1 Old Old_1
## AAACGGGGTCCTCCAT-1_1 Old Old_1
## AAAGATGCAAGGGTCA-1_1 Old Old_1
Y=subset(cluster1,subset=condition=="Young")
O=subset(cluster1,subset=condition=="Old")
mergeClus1=merge(x = Y, y = O)
head(mergeClus1@active.ident)
## AAACCTGCAAGTAATG-1_3 AAACGGGTCGCTGATA-1_3 AAAGATGTCAGCACAT-1_3
## 1 1 1
## AAAGCAAAGATGTGGC-1_3 AAAGCAAAGCCGCCTA-1_3 AAAGCAACAAAGGAAG-1_3
## 1 1 1
## Levels: 1
head(mergeClus1@active.assay)
## [1] "SCT"
DefaultAssay(mergeClus1) <- "SCT"
head(mergeClus1@active.assay)
## [1] "SCT"
FAmergeClus1 <- FindVariableFeatures(object = mergeClus1, selection.method = "vst", nfeatures = 2000)
FAmergeClus1 <- ScaleData(FAmergeClus1, verbose = TRUE)
FAClus1 <- RunPCA(object = FAmergeClus1, npcs = 50, verbose = FALSE)
DimHeatmap(object = FAClus1, dims = 1, cells = 500, balanced=TRUE)
ElbowPlot(FAClus1, ndims = 50)
FAClus1 <- FindNeighbors(object = FAClus1, dims = 1:3)
FAClus1= RunTSNE(FAClus1, dims=1:3)
head (FAClus1@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt nCount_SCT
## AAACCTGCAAGTAATG-1_3 Young_29y 1416 737 9.816384 3237
## AAACGGGTCGCTGATA-1_3 Young_29y 5103 2038 2.900255 4278
## AAAGATGTCAGCACAT-1_3 Young_29y 2980 1244 2.382550 3424
## AAAGCAAAGATGTGGC-1_3 Young_29y 3817 1537 2.279277 3806
## AAAGCAAAGCCGCCTA-1_3 Young_29y 4412 1736 3.014506 4066
## AAAGCAACAAAGGAAG-1_3 Young_29y 4240 1709 6.438679 3999
## nFeature_SCT integrated_snn_res.0.3 seurat_clusters
## AAACCTGCAAGTAATG-1_3 850 1 1
## AAACGGGTCGCTGATA-1_3 2029 1 1
## AAAGATGTCAGCACAT-1_3 1242 1 1
## AAAGCAAAGATGTGGC-1_3 1536 1 1
## AAAGCAAAGCCGCCTA-1_3 1735 1 1
## AAAGCAACAAAGGAAG-1_3 1705 1 1
## condition Cond_Clus
## AAACCTGCAAGTAATG-1_3 Young Young_1
## AAACGGGTCGCTGATA-1_3 Young Young_1
## AAAGATGTCAGCACAT-1_3 Young Young_1
## AAAGCAAAGATGTGGC-1_3 Young Young_1
## AAAGCAAAGCCGCCTA-1_3 Young Young_1
## AAAGCAACAAAGGAAG-1_3 Young Young_1
Idents(cluster1)="Cond_Clus"
DimPlot(FAClus1)
DimPlot(FAClus1, split.by="Cond_Clus")
#FeaturePlot(FAClus1,"SOD3", split.by="condition")
FeaturePlot(FAClus1,"SOD3", cols=c("grey","orange","red"), label=T)
FeaturePlot(FAClus1,"SOD3", split.by="orig.ident", cols=c("grey","orange","red"))
head(FAClus1@active.assay)
## [1] "SCT"
Idents(FAClus1)="condition"
FAClus1_markers <- FindMarkers(FAClus1, ident.1 = "Young", ident.2 = "Old",logfc.threshold = 0.25)
head (FAClus1_markers)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## SOD3 0.000000e+00 1.0009360 0.841 0.452 0.000000e+00
## KLF6 1.714084e-250 -0.8197599 0.553 0.886 3.336122e-246
## TACC2 2.196916e-248 0.5859494 0.964 0.797 4.275858e-244
## SLPI 5.671500e-248 -0.6468030 1.000 1.000 1.103844e-243
## RPS4Y1 3.180983e-232 -1.2169557 0.000 0.403 6.191148e-228
## EIF2S2 2.974092e-214 0.6183710 0.909 0.674 5.788476e-210
cluster2=subset(objectI30,subset=seurat_clusters==2)
Idents(cluster2)="condition"
DEClus2_markers <- FindMarkers(cluster2, ident.1 = "Young", ident.2 = "Old",logfc.threshold = 0.25)
head (DEClus2_markers)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## SLPI 1.750918e-93 -0.6658101 0.979 0.997 3.407812e-89
## SOD3 1.059162e-76 0.8321142 0.738 0.441 2.061447e-72
## APOE 1.626007e-75 0.4141389 0.348 0.040 3.164697e-71
## DONSON 1.346086e-74 0.3822088 0.264 0.009 2.619887e-70
## EIF2S2 6.801560e-71 0.5761827 0.902 0.678 1.323788e-66
## RPS4Y1 2.233024e-67 -1.2145276 0.000 0.360 4.346135e-63
cluster3=subset(objectI30,subset=seurat_clusters==3)
Idents(cluster3)="condition"
DEClus3_markers <- FindMarkers(cluster3, ident.1 = "Young", ident.2 = "Old",logfc.threshold = 0.25)
head (DEClus3_markers)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## XIST 1.977756e-74 0.7735869 0.871 0.278 3.849307e-70
## RPS4Y1 1.460157e-69 -1.4780800 0.000 0.648 2.841905e-65
## DONSON 1.421639e-58 0.4356137 0.328 0.007 2.766936e-54
## KLF6 1.096396e-56 -0.9249761 0.481 0.905 2.133915e-52
## SCGB3A2 3.864849e-56 -0.8824029 0.944 0.968 7.522156e-52
## SLPI 4.950875e-54 -0.6422324 1.000 1.000 9.635887e-50
cluster4=subset(objectI30,subset=seurat_clusters==4)
Idents(cluster4)="condition"
DEClus4_markers <- FindMarkers(cluster4, ident.1 = "Young", ident.2 = "Old",logfc.threshold = 0.25)
head (DEClus4_markers)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## SOD3 1.495984e-78 1.1662374 0.885 0.446 2.911633e-74
## KLF6 1.248456e-71 -1.0327124 0.641 0.960 2.429870e-67
## HLA-DRB5 1.149570e-62 -0.6888812 0.188 0.754 2.237407e-58
## TACC2 1.472720e-56 0.6329141 0.981 0.827 2.866356e-52
## DONSON 2.377614e-56 0.5526940 0.450 0.010 4.627549e-52
## SLPI 4.637310e-50 -0.6552608 1.000 1.000 9.025596e-46
Idents(cluster1)="condition"
cluster.list=list(cluster1,cluster2,cluster3,cluster4)
VCL.list=list()
for (i in 1:length(cluster.list)) {
VCL.list[[i]] <- VlnPlot(cluster.list[[i]], features=c("SOD3","KLF6"), split.by="condition",
pt.size=0)
}
CombinePlots(VCL.list)
for (i in 1:length(cluster.list)) {
VCL.list[[i]] <- FeaturePlot(cluster.list[[i]], features=c("SOD3","KLF6"), cols=c("grey","red","black"),split.by="condition")
}
CombinePlots(VCL.list)
#DimPlot(Fibrotic)
#mergeClus0=merge(x = Donor, y = Fibrotic) # merging and assigning the object
#head(mergeClus0@meta.data)
#NOTE: How to make use of it?