1. load libraries

2. Load Seurat Object


# load("AllSample_corrected_AzimuthAnnotated_L1.Robj")

# merged_seurat_filtered <- All_samples_Merged

merged_seurat_filtered
An object of class Seurat 
36724 features across 49193 samples within 5 assays 
Active assay: RNA (36601 features, 2000 variable features)
 3 layers present: counts, data, scale.data
 4 other assays present: ADT, prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3
 4 dimensional reductions calculated: integrated_dr, ref.umap, pca, umap

3. Data PREPERATION


# perform standard workflow steps to figure out if we see any batch effects --------
# merged_seurat_filtered <- NormalizeData(object = merged_seurat_filtered, verbose = FALSE)
# merged_seurat_filtered <- FindVariableFeatures(object = merged_seurat_filtered , selection.method = "vst", nfeatures = 2000,verbose = FALSE)
#                                                
# merged_seurat_filtered <- ScaleData(object = merged_seurat_filtered)
# 
# 
# Variable_genes <- merged_seurat_filtered@assays$RNA@var.features
# 
# Variables_genes_after_exclusion <- Variable_genes[!grepl("^HLA-|^Xist", Variable_genes)]
#   
# merged_seurat_filtered <- RunPCA(merged_seurat_filtered,
#                          features = Variables_genes_after_exclusion,
#                          do.print = TRUE, 
#                          pcs.print = 1:5, 
#                          genes.print = 15)

ElbowPlot(merged_seurat_filtered)



# merged_seurat_filtered <- FindNeighbors(object = merged_seurat_filtered, dims = 1:20, verbose = FALSE)
# merged_seurat_filtered <- FindClusters(object = merged_seurat_filtered, resolution = 1.2)
# merged_seurat_filtered <- RunUMAP(object = merged_seurat_filtered, dims = 1:20)


# plot
p1 <- DimPlot(merged_seurat_filtered, reduction = "umap", group.by = "cell_line", label = TRUE, label.box = TRUE, repel = TRUE)
p2 <- DimPlot(merged_seurat_filtered, reduction = "umap", group.by = "RNA_snn_res.0.5", label = TRUE, label.box = TRUE, repel = TRUE)

grid.arrange(p1, p2, ncol = 2, nrow = 2)



DimPlot(merged_seurat_filtered, reduction = "umap", group.by = "cell_line", label = TRUE, label.box = TRUE, repel = TRUE)


DimPlot(merged_seurat_filtered, reduction = "umap", group.by = "RNA_snn_res.0.5", label = TRUE, label.box = TRUE, repel = TRUE)

DimPlot(merged_seurat_filtered, reduction = "umap", group.by = "predicted.celltype.l2", label = TRUE, label.box = TRUE, repel = TRUE)

4. CCA-integration


# perform integration to correct for batch effects ------
# obj.list <- SplitObject(merged_seurat_filtered, split.by = "orig.ident")
# for(i in 1:length(obj.list)){
#   obj.list[[i]] <- NormalizeData(object = obj.list[[i]], verbose = FALSE)
#   obj.list[[i]] <- FindVariableFeatures(object = obj.list[[i]], selection.method = "vst", nfeatures = 2000,verbose = FALSE)
# }



# select integration features
# features <- SelectIntegrationFeatures(object.list = obj.list)



# find integration anchors (CCA)
# # anchors <- FindIntegrationAnchors(object.list = obj.list,
#                        anchor.features = features)

# integrate data
# seurat.integrated <- IntegrateData(anchorset = anchors)


# Scale data, run PCA and UMAP and visualize integrated data
# seurat.integrated <- ScaleData(object = seurat.integrated)
# 
# seurat.integrated <-  RunPCA(merged_seurat_filtered,
#                          features = Variables_genes_after_exclusion,
#                          do.print = TRUE, 
#                          pcs.print = 1:5, 
#                          genes.print = 15)
# 
 seurat.integrated <- RunUMAP(object = seurat.integrated, dims = 1:12)
16:44:49 UMAP embedding parameters a = 0.9922 b = 1.112
16:44:49 Read 49193 rows and found 12 numeric columns
16:44:49 Using Annoy for neighbor search, n_neighbors = 30
16:44:49 Building Annoy index with metric = cosine, n_trees = 50
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
16:44:52 Writing NN index file to temp file /tmp/Rtmpo9cMvG/filec773d7ed006a3
16:44:52 Searching Annoy index using 1 thread, search_k = 3000
16:45:03 Annoy recall = 100%
16:45:04 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
16:45:06 Initializing from normalized Laplacian + noise (using RSpectra)
16:46:29 Commencing optimization for 200 epochs, with 2051184 positive edges
Using method 'umap'
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
16:46:45 Optimization finished
p3 <- DimPlot(seurat.integrated, reduction = "umap", group.by = "cell_line", label = TRUE, label.box = TRUE, repel = TRUE)
p4 <- DimPlot(seurat.integrated,reduction = "umap", group.by = "RNA_snn_res.0.5", label = TRUE, label.box = TRUE, repel = TRUE)


grid.arrange(p1, p2, p3, p4, ncol = 2, nrow = 2)


DimPlot(seurat.integrated, reduction = "umap", group.by = "cell_line", label = TRUE, label.box = TRUE, repel = TRUE)


DimPlot(seurat.integrated, reduction = "umap", group.by = "RNA_snn_res.0.5", label = TRUE, label.box = TRUE, repel = TRUE)

DimPlot(seurat.integrated, reduction = "umap", group.by = "predicted.celltype.l2", label = TRUE, label.box = TRUE, repel = TRUE)

FeaturePlot



myfeatures <- c("CD3E", "CD4", "CD8A", "NKG7", "GNLY", "MS4A1", "CD14", "LYZ", "MS4A7", "FCGR3A", "CST3", "FCER1A")

FeaturePlot(seurat.integrated, reduction = "umap", dims = 1:2, features = myfeatures, ncol = 4, order = T) + NoLegend() + NoAxes() + NoGrid()

6. Save the Seurat object as an Robj file


save(seurat.integrated, file = "seurat.integrated_K_code_CCA.Robj")
LS0tCnRpdGxlOiAiSW50ZWdyYXRpb24gYnkgQ0NBX2J5X0siCmF1dGhvcjogTmFzaXIgTWFobW9vZCBBYmJhc2kKZGF0ZTogIjIwMjQtMDUtMTAiCm91dHB1dDoKICBodG1sX25vdGVib29rOiAKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICB0b2NfY29sbGFwc2VkOiB0cnVlCiAgICB0aGVtZTogZGFya2x5Ci0tLQojIDEuIGxvYWQgbGlicmFyaWVzCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQpsaWJyYXJ5KFNldXJhdCkKbGlicmFyeShTZXVyYXRPYmplY3QpCmxpYnJhcnkoU2V1cmF0RGF0YSkKbGlicmFyeShwYXRjaHdvcmspCmxpYnJhcnkoaGFybW9ueSkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KHJldGljdWxhdGUpCmxpYnJhcnkoQXppbXV0aCkKbGlicmFyeShkcGx5cikKbGlicmFyeShSdHNuZSkKbGlicmFyeShoYXJtb255KQpsaWJyYXJ5KGdyaWRFeHRyYSkKYGBgCgojIDIuIExvYWQgU2V1cmF0IE9iamVjdCAKYGBge3IgbG9hZF9zZXVyYXR9CgojIGxvYWQoIkFsbFNhbXBsZV9jb3JyZWN0ZWRfQXppbXV0aEFubm90YXRlZF9MMS5Sb2JqIikKCiMgbWVyZ2VkX3NldXJhdF9maWx0ZXJlZCA8LSBBbGxfc2FtcGxlc19NZXJnZWQKCm1lcmdlZF9zZXVyYXRfZmlsdGVyZWQKCmBgYAoKCiMgMy4gRGF0YSBQUkVQRVJBVElPTgpgYGB7ciBkYXRhLCBmaWcuaGVpZ2h0PTYsIGZpZy53aWR0aD0xMH0KCiMgcGVyZm9ybSBzdGFuZGFyZCB3b3JrZmxvdyBzdGVwcyB0byBmaWd1cmUgb3V0IGlmIHdlIHNlZSBhbnkgYmF0Y2ggZWZmZWN0cyAtLS0tLS0tLQojIG1lcmdlZF9zZXVyYXRfZmlsdGVyZWQgPC0gTm9ybWFsaXplRGF0YShvYmplY3QgPSBtZXJnZWRfc2V1cmF0X2ZpbHRlcmVkLCB2ZXJib3NlID0gRkFMU0UpCiMgbWVyZ2VkX3NldXJhdF9maWx0ZXJlZCA8LSBGaW5kVmFyaWFibGVGZWF0dXJlcyhvYmplY3QgPSBtZXJnZWRfc2V1cmF0X2ZpbHRlcmVkICwgc2VsZWN0aW9uLm1ldGhvZCA9ICJ2c3QiLCBuZmVhdHVyZXMgPSAyMDAwLHZlcmJvc2UgPSBGQUxTRSkKIyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAojIG1lcmdlZF9zZXVyYXRfZmlsdGVyZWQgPC0gU2NhbGVEYXRhKG9iamVjdCA9IG1lcmdlZF9zZXVyYXRfZmlsdGVyZWQpCiMgCiMgCiMgVmFyaWFibGVfZ2VuZXMgPC0gbWVyZ2VkX3NldXJhdF9maWx0ZXJlZEBhc3NheXMkUk5BQHZhci5mZWF0dXJlcwojIAojIFZhcmlhYmxlc19nZW5lc19hZnRlcl9leGNsdXNpb24gPC0gVmFyaWFibGVfZ2VuZXNbIWdyZXBsKCJeSExBLXxeWGlzdCIsIFZhcmlhYmxlX2dlbmVzKV0KIyAgIAojIG1lcmdlZF9zZXVyYXRfZmlsdGVyZWQgPC0gUnVuUENBKG1lcmdlZF9zZXVyYXRfZmlsdGVyZWQsCiMgICAgICAgICAgICAgICAgICAgICAgICAgIGZlYXR1cmVzID0gVmFyaWFibGVzX2dlbmVzX2FmdGVyX2V4Y2x1c2lvbiwKIyAgICAgICAgICAgICAgICAgICAgICAgICAgZG8ucHJpbnQgPSBUUlVFLCAKIyAgICAgICAgICAgICAgICAgICAgICAgICAgcGNzLnByaW50ID0gMTo1LCAKIyAgICAgICAgICAgICAgICAgICAgICAgICAgZ2VuZXMucHJpbnQgPSAxNSkKCkVsYm93UGxvdChtZXJnZWRfc2V1cmF0X2ZpbHRlcmVkKQoKCiMgbWVyZ2VkX3NldXJhdF9maWx0ZXJlZCA8LSBGaW5kTmVpZ2hib3JzKG9iamVjdCA9IG1lcmdlZF9zZXVyYXRfZmlsdGVyZWQsIGRpbXMgPSAxOjIwLCB2ZXJib3NlID0gRkFMU0UpCiMgbWVyZ2VkX3NldXJhdF9maWx0ZXJlZCA8LSBGaW5kQ2x1c3RlcnMob2JqZWN0ID0gbWVyZ2VkX3NldXJhdF9maWx0ZXJlZCwgcmVzb2x1dGlvbiA9IDEuMikKIyBtZXJnZWRfc2V1cmF0X2ZpbHRlcmVkIDwtIFJ1blVNQVAob2JqZWN0ID0gbWVyZ2VkX3NldXJhdF9maWx0ZXJlZCwgZGltcyA9IDE6MjApCgoKIyBwbG90CnAxIDwtIERpbVBsb3QobWVyZ2VkX3NldXJhdF9maWx0ZXJlZCwgcmVkdWN0aW9uID0gInVtYXAiLCBncm91cC5ieSA9ICJjZWxsX2xpbmUiLCBsYWJlbCA9IFRSVUUsIGxhYmVsLmJveCA9IFRSVUUsIHJlcGVsID0gVFJVRSkKcDIgPC0gRGltUGxvdChtZXJnZWRfc2V1cmF0X2ZpbHRlcmVkLCByZWR1Y3Rpb24gPSAidW1hcCIsIGdyb3VwLmJ5ID0gIlJOQV9zbm5fcmVzLjAuNSIsIGxhYmVsID0gVFJVRSwgbGFiZWwuYm94ID0gVFJVRSwgcmVwZWwgPSBUUlVFKQoKZ3JpZC5hcnJhbmdlKHAxLCBwMiwgbmNvbCA9IDIsIG5yb3cgPSAyKQoKCkRpbVBsb3QobWVyZ2VkX3NldXJhdF9maWx0ZXJlZCwgcmVkdWN0aW9uID0gInVtYXAiLCBncm91cC5ieSA9ICJjZWxsX2xpbmUiLCBsYWJlbCA9IFRSVUUsIGxhYmVsLmJveCA9IFRSVUUsIHJlcGVsID0gVFJVRSkKCkRpbVBsb3QobWVyZ2VkX3NldXJhdF9maWx0ZXJlZCwgcmVkdWN0aW9uID0gInVtYXAiLCBncm91cC5ieSA9ICJSTkFfc25uX3Jlcy4wLjUiLCBsYWJlbCA9IFRSVUUsIGxhYmVsLmJveCA9IFRSVUUsIHJlcGVsID0gVFJVRSkKRGltUGxvdChtZXJnZWRfc2V1cmF0X2ZpbHRlcmVkLCByZWR1Y3Rpb24gPSAidW1hcCIsIGdyb3VwLmJ5ID0gInByZWRpY3RlZC5jZWxsdHlwZS5sMiIsIGxhYmVsID0gVFJVRSwgbGFiZWwuYm94ID0gVFJVRSwgcmVwZWwgPSBUUlVFKQoKYGBgCgoKIyA0LiBDQ0EtaW50ZWdyYXRpb24KYGBge3IgaW50ZWdyYXRpb24tcnBjYSwgZmlnLmhlaWdodD02LCBmaWcud2lkdGg9MTB9CgojIHBlcmZvcm0gaW50ZWdyYXRpb24gdG8gY29ycmVjdCBmb3IgYmF0Y2ggZWZmZWN0cyAtLS0tLS0KIyBvYmoubGlzdCA8LSBTcGxpdE9iamVjdChtZXJnZWRfc2V1cmF0X2ZpbHRlcmVkLCBzcGxpdC5ieSA9ICJvcmlnLmlkZW50IikKIyBmb3IoaSBpbiAxOmxlbmd0aChvYmoubGlzdCkpewojICAgb2JqLmxpc3RbW2ldXSA8LSBOb3JtYWxpemVEYXRhKG9iamVjdCA9IG9iai5saXN0W1tpXV0sIHZlcmJvc2UgPSBGQUxTRSkKIyAgIG9iai5saXN0W1tpXV0gPC0gRmluZFZhcmlhYmxlRmVhdHVyZXMob2JqZWN0ID0gb2JqLmxpc3RbW2ldXSwgc2VsZWN0aW9uLm1ldGhvZCA9ICJ2c3QiLCBuZmVhdHVyZXMgPSAyMDAwLHZlcmJvc2UgPSBGQUxTRSkKIyB9CgoKCiMgc2VsZWN0IGludGVncmF0aW9uIGZlYXR1cmVzCiMgZmVhdHVyZXMgPC0gU2VsZWN0SW50ZWdyYXRpb25GZWF0dXJlcyhvYmplY3QubGlzdCA9IG9iai5saXN0KQoKCgojIGZpbmQgaW50ZWdyYXRpb24gYW5jaG9ycyAoQ0NBKQojICMgYW5jaG9ycyA8LSBGaW5kSW50ZWdyYXRpb25BbmNob3JzKG9iamVjdC5saXN0ID0gb2JqLmxpc3QsCiMgICAgICAgICAgICAgICAgICAgICAgICBhbmNob3IuZmVhdHVyZXMgPSBmZWF0dXJlcykKCiMgaW50ZWdyYXRlIGRhdGEKIyBzZXVyYXQuaW50ZWdyYXRlZCA8LSBJbnRlZ3JhdGVEYXRhKGFuY2hvcnNldCA9IGFuY2hvcnMpCgoKIyBTY2FsZSBkYXRhLCBydW4gUENBIGFuZCBVTUFQIGFuZCB2aXN1YWxpemUgaW50ZWdyYXRlZCBkYXRhCiMgc2V1cmF0LmludGVncmF0ZWQgPC0gU2NhbGVEYXRhKG9iamVjdCA9IHNldXJhdC5pbnRlZ3JhdGVkKQojIAojIHNldXJhdC5pbnRlZ3JhdGVkIDwtICBSdW5QQ0EobWVyZ2VkX3NldXJhdF9maWx0ZXJlZCwKIyAgICAgICAgICAgICAgICAgICAgICAgICAgZmVhdHVyZXMgPSBWYXJpYWJsZXNfZ2VuZXNfYWZ0ZXJfZXhjbHVzaW9uLAojICAgICAgICAgICAgICAgICAgICAgICAgICBkby5wcmludCA9IFRSVUUsIAojICAgICAgICAgICAgICAgICAgICAgICAgICBwY3MucHJpbnQgPSAxOjUsIAojICAgICAgICAgICAgICAgICAgICAgICAgICBnZW5lcy5wcmludCA9IDE1KQojIAogc2V1cmF0LmludGVncmF0ZWQgPC0gUnVuVU1BUChvYmplY3QgPSBzZXVyYXQuaW50ZWdyYXRlZCwgZGltcyA9IDE6MTIpCgoKcDMgPC0gRGltUGxvdChzZXVyYXQuaW50ZWdyYXRlZCwgcmVkdWN0aW9uID0gInVtYXAiLCBncm91cC5ieSA9ICJjZWxsX2xpbmUiLCBsYWJlbCA9IFRSVUUsIGxhYmVsLmJveCA9IFRSVUUsIHJlcGVsID0gVFJVRSkKcDQgPC0gRGltUGxvdChzZXVyYXQuaW50ZWdyYXRlZCxyZWR1Y3Rpb24gPSAidW1hcCIsIGdyb3VwLmJ5ID0gIlJOQV9zbm5fcmVzLjAuNSIsIGxhYmVsID0gVFJVRSwgbGFiZWwuYm94ID0gVFJVRSwgcmVwZWwgPSBUUlVFKQoKCmdyaWQuYXJyYW5nZShwMSwgcDIsIHAzLCBwNCwgbmNvbCA9IDIsIG5yb3cgPSAyKQoKRGltUGxvdChzZXVyYXQuaW50ZWdyYXRlZCwgcmVkdWN0aW9uID0gInVtYXAiLCBncm91cC5ieSA9ICJjZWxsX2xpbmUiLCBsYWJlbCA9IFRSVUUsIGxhYmVsLmJveCA9IFRSVUUsIHJlcGVsID0gVFJVRSkKCkRpbVBsb3Qoc2V1cmF0LmludGVncmF0ZWQsIHJlZHVjdGlvbiA9ICJ1bWFwIiwgZ3JvdXAuYnkgPSAiUk5BX3Nubl9yZXMuMC41IiwgbGFiZWwgPSBUUlVFLCBsYWJlbC5ib3ggPSBUUlVFLCByZXBlbCA9IFRSVUUpCkRpbVBsb3Qoc2V1cmF0LmludGVncmF0ZWQsIHJlZHVjdGlvbiA9ICJ1bWFwIiwgZ3JvdXAuYnkgPSAicHJlZGljdGVkLmNlbGx0eXBlLmwyIiwgbGFiZWwgPSBUUlVFLCBsYWJlbC5ib3ggPSBUUlVFLCByZXBlbCA9IFRSVUUpCgpgYGAKCgojIEZlYXR1cmVQbG90CmBgYHtyIGZlYXR1cmVwbG90LWhhcm1vbnksIGZpZy5oZWlnaHQ9NiwgZmlnLndpZHRoPTEwfQoKCm15ZmVhdHVyZXMgPC0gYygiQ0QzRSIsICJDRDQiLCAiQ0Q4QSIsICJOS0c3IiwgIkdOTFkiLCAiTVM0QTEiLCAiQ0QxNCIsICJMWVoiLCAiTVM0QTciLCAiRkNHUjNBIiwgIkNTVDMiLCAiRkNFUjFBIikKCkZlYXR1cmVQbG90KHNldXJhdC5pbnRlZ3JhdGVkLCByZWR1Y3Rpb24gPSAidW1hcCIsIGRpbXMgPSAxOjIsIGZlYXR1cmVzID0gbXlmZWF0dXJlcywgbmNvbCA9IDQsIG9yZGVyID0gVCkgKyBOb0xlZ2VuZCgpICsgTm9BeGVzKCkgKyBOb0dyaWQoKQoKYGBgCgoKIyA2LiBTYXZlIHRoZSBTZXVyYXQgb2JqZWN0IGFzIGFuIFJvYmogZmlsZQpgYGB7ciBzYXZlUk9CSn0KCnNhdmUoc2V1cmF0LmludGVncmF0ZWQsIGZpbGUgPSAic2V1cmF0LmludGVncmF0ZWRfS19jb2RlX0NDQS5Sb2JqIikKCgpgYGAKCgoKCg==