#Load Seurat Object merged from cell lines and a control(PBMC) after filtration
SS_All_samples_Merged <- load("/home/bioinfo/Documents/1-SS-STeps/0-imp_OBJ_SS/All_Normal-PBMC_Abnormal-cellLines_T_cells_Merged_Annotated_UMAP_on_Clusters_to_USE.Robj")
All_samples_Merged
An object of class Seurat
62625 features across 46976 samples within 6 assays
Active assay: SCT (25901 features, 3000 variable features)
3 layers present: counts, data, scale.data
5 other assays present: RNA, ADT, prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3
4 dimensional reductions calculated: pca, umap, integrated_dr, ref.umap
Idents(All_samples_Merged) <- "cell_line"
VlnPlot(All_samples_Merged, features = c("nFeature_RNA",
"nCount_RNA",
"percent.mito"),
ncol = 3)
VlnPlot(All_samples_Merged, features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt",
"percent.rb"),
ncol = 4, pt.size = 0.1) &
theme(plot.title = element_text(size=10))
FeatureScatter(All_samples_Merged,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA") +
geom_smooth(method = 'lm')
##FeatureScatter is typically used to visualize feature-feature relationships ##for anything calculated by the object, ##i.e. columns in object metadata, PC scores etc.
FeatureScatter(All_samples_Merged,
feature1 = "nCount_RNA",
feature2 = "percent.mito")+
geom_smooth(method = 'lm')
FeatureScatter(All_samples_Merged,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA")+
geom_smooth(method = 'lm')
# TEST-1
# given that the output of RunPCA is "pca"
# replace "so" by the name of your seurat object
pct <- All_samples_Merged[["pca"]]@stdev / sum(All_samples_Merged[["pca"]]@stdev) * 100
cumu <- cumsum(pct) # Calculate cumulative percents for each PC
# Determine the difference between variation of PC and subsequent PC
co2 <- sort(which((pct[-length(pct)] - pct[-1]) > 0.1), decreasing = T)[1] + 1
# last point where change of % of variation is more than 0.1%. -> co2
co2
[1] 13
# TEST-2
# get significant PCs
stdv <- All_samples_Merged[["pca"]]@stdev
sum.stdv <- sum(All_samples_Merged[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100
cumulative <- cumsum(percent.stdv)
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co2 <- sort(which((percent.stdv[1:length(percent.stdv) - 1] -
percent.stdv[2:length(percent.stdv)]) > 0.1),
decreasing = T)[1] + 1
min.pc <- min(co1, co2)
min.pc
[1] 13
# Create a dataframe with values
plot_df <- data.frame(pct = percent.stdv,
cumu = cumulative,
rank = 1:length(percent.stdv))
# Elbow plot to visualize
ggplot(plot_df, aes(cumulative, percent.stdv, label = rank, color = rank > min.pc)) +
geom_text() +
geom_vline(xintercept = 90, color = "grey") +
geom_hline(yintercept = min(percent.stdv[percent.stdv > 5]), color = "grey") +
theme_bw()
NA
NA
NA
NA
NA
NA
NA
NA
NA
NA
NA
NA
NA
P1 <- DimPlot(object = All_samples_Merged, group.by = "cell_line", label = T, label.box = T) +
labs(title = 'Colored by cellline')
P1
P2 <- DimPlot(object = All_samples_Merged, group.by = "predicted.celltype.l2") +
labs(title = 'Colored by celltype')
P2
P3 <- DimPlot(object = All_samples_Merged, group.by = "cell_line", reduction = "pca") +
labs(title = 'Colored by cellline')
P3
P4 <- DimPlot(object = All_samples_Merged, group.by = "predicted.celltype.l2", reduction = "pca") +
labs(title = 'Colored by celltype')
P4
cowplot::plot_grid(P1, P2, P3, P4, nrow = 2)
cell_distribution_table <- table(All_samples_Merged$predicted.celltype.l2, All_samples_Merged$seurat_clusters)
cell_distribution_df <- as.data.frame.matrix(cell_distribution_table)
print(cell_distribution_df)
write.csv(cell_distribution_df, file = "test2.csv", row.names = TRUE)
All_samples_Merged_integrated <- RunHarmony(All_samples_Merged, "cell_line")
Transposing data matrix
Initializing state using k-means centroids initialization
Harmony 1/10
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Harmony 2/10
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Harmony converged after 2 iterations
# Do UMAP and clustering using ** Harmony embeddings instead of PCA **
All_samples_Merged_integrated <- All_samples_Merged_integrated %>%
RunUMAP(reduction = 'harmony', dims = 1:13) %>%
FindNeighbors(reduction = "harmony", dims = 1:13) %>%
FindClusters(resolution = 0.5)
17:01:56 UMAP embedding parameters a = 0.9922 b = 1.112
17:01:56 Read 46976 rows and found 13 numeric columns
17:01:56 Using Annoy for neighbor search, n_neighbors = 30
17:01:56 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
17:01:59 Writing NN index file to temp file /tmp/RtmplSHSp6/file180302c135d88
17:01:59 Searching Annoy index using 1 thread, search_k = 3000
17:02:11 Annoy recall = 100%
17:02:12 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
17:02:14 Initializing from normalized Laplacian + noise (using RSpectra)
17:02:15 Commencing optimization for 200 epochs, with 1955114 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
17:02:30 Optimization finished
Computing nearest neighbor graph
Computing SNN
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 46976
Number of edges: 1404500
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8881
Number of communities: 12
Elapsed time: 11 seconds
DimPlot(object = All_samples_Merged_integrated, group.by = "predicted.celltype.l2", label = T, label.box = T, repel = T, reduction = "umap")
DimPlot(object = All_samples_Merged_integrated, group.by = "predicted.celltype.l2", label = T, label.box = T, repel = T, reduction = "harmony")
DimPlot(object = All_samples_Merged_integrated, group.by = "predicted.celltype.l2", label = T, label.box = T, repel = T, reduction = "integrated_dr")
DimPlot(object = All_samples_Merged_integrated, group.by = "predicted.celltype.l2", label = T, label.box = T, repel = T, reduction = "pca")
DimPlot(object = All_samples_Merged_integrated, group.by = "predicted.celltype.l2", label = T, label.box = T, repel = T, reduction = "ref.umap")
DimPlot(object = All_samples_Merged_integrated, group.by = "cell_line", label = T, label.box = T, repel = T, reduction = "umap")
DimPlot(object = All_samples_Merged_integrated, group.by = "seurat_clusters", label = T, label.box = T, repel = T, reduction = "umap")
cell_distribution_table <- table(All_samples_Merged_integrated$cell_line, All_samples_Merged_integrated$seurat_clusters)
cell_distribution_df <- as.data.frame.matrix(cell_distribution_table)
print(cell_distribution_df)
write.csv(cell_distribution_df, file = "15-2-integration_table_HARMONY-TEST1.csv", row.names = TRUE)
cell_distribution_table <- table(All_samples_Merged_integrated$predicted.celltype.l2, All_samples_Merged_integrated$seurat_clusters)
cell_distribution_df <- as.data.frame.matrix(cell_distribution_table)
print(cell_distribution_df)
write.csv(cell_distribution_df, file = "15-2-integration_table_HARMONY-TEST1_annotationbased.csv", row.names = TRUE)
cell_distribution_table <- table(All_samples_Merged_integrated$predicted.celltype.l1, All_samples_Merged_integrated$seurat_clusters)
cell_distribution_df <- as.data.frame.matrix(cell_distribution_table)
print(cell_distribution_df)
write.csv(cell_distribution_df, file = "15-2-integration_table_HARMONY-TEST1_annotationbased_l1.csv", row.names = TRUE)
cell_distribution_table <- table(All_samples_Merged_integrated$predicted.celltype.l2, All_samples_Merged_integrated$cell_line)
cell_distribution_df <- as.data.frame.matrix(cell_distribution_table)
print(cell_distribution_df)
write.csv(cell_distribution_df, file = "15-2-integration_table_HARMONY-TEST1_annotationbased_cellline.csv", row.names = TRUE)
# save
#save(All_samples_Merged_integrated,file = "15-2_All_samples_Merged_integrated.Robj")
```