1. load libraries
2. Load Seurat Object
# load("AllSample_corrected_AzimuthAnnotated_L1.Robj")
# merged_seurat_filtered <- All_samples_Merged
merged_seurat_filtered
An object of class Seurat
36724 features across 49193 samples within 5 assays
Active assay: RNA (36601 features, 2000 variable features)
3 layers present: counts, data, scale.data
4 other assays present: ADT, prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3
4 dimensional reductions calculated: integrated_dr, ref.umap, pca, umap
3. Data PREPERATION
# perform standard workflow steps to figure out if we see any batch effects --------
# merged_seurat_filtered <- NormalizeData(object = merged_seurat_filtered, verbose = FALSE)
# merged_seurat_filtered <- FindVariableFeatures(object = merged_seurat_filtered , selection.method = "vst", nfeatures = 2000,verbose = FALSE)
#
# merged_seurat_filtered <- ScaleData(object = merged_seurat_filtered)
#
#
# Variable_genes <- merged_seurat_filtered@assays$RNA@var.features
#
# Variables_genes_after_exclusion <- Variable_genes[!grepl("^HLA-|^Xist", Variable_genes)]
#
# merged_seurat_filtered <- RunPCA(merged_seurat_filtered,
# features = Variables_genes_after_exclusion,
# do.print = TRUE,
# pcs.print = 1:5,
# genes.print = 15)
ElbowPlot(merged_seurat_filtered)

# merged_seurat_filtered <- FindNeighbors(object = merged_seurat_filtered, dims = 1:20, verbose = FALSE)
# merged_seurat_filtered <- FindClusters(object = merged_seurat_filtered, resolution = 1.2)
# merged_seurat_filtered <- RunUMAP(object = merged_seurat_filtered, dims = 1:20)
# plot
p1 <- DimPlot(merged_seurat_filtered, reduction = "umap", group.by = "cell_line", label = TRUE, label.box = TRUE, repel = TRUE)
p2 <- DimPlot(merged_seurat_filtered, reduction = "umap", group.by = "RNA_snn_res.0.5", label = TRUE, label.box = TRUE, repel = TRUE)
grid.arrange(p1, p2, ncol = 2, nrow = 2)

DimPlot(merged_seurat_filtered, reduction = "umap", group.by = "cell_line", label = TRUE, label.box = TRUE, repel = TRUE)

DimPlot(merged_seurat_filtered, reduction = "umap", group.by = "RNA_snn_res.0.5", label = TRUE, label.box = TRUE, repel = TRUE)

DimPlot(merged_seurat_filtered, reduction = "umap", group.by = "predicted.celltype.l2", label = TRUE, label.box = TRUE, repel = TRUE)

4. CCA-integration
# perform integration to correct for batch effects ------
# obj.list <- SplitObject(merged_seurat_filtered, split.by = "orig.ident")
# for(i in 1:length(obj.list)){
# obj.list[[i]] <- NormalizeData(object = obj.list[[i]], verbose = FALSE)
# obj.list[[i]] <- FindVariableFeatures(object = obj.list[[i]], selection.method = "vst", nfeatures = 2000,verbose = FALSE)
# }
# select integration features
# features <- SelectIntegrationFeatures(object.list = obj.list)
# find integration anchors (CCA)
# # anchors <- FindIntegrationAnchors(object.list = obj.list,
# anchor.features = features)
# integrate data
# seurat.integrated <- IntegrateData(anchorset = anchors)
# Scale data, run PCA and UMAP and visualize integrated data
# seurat.integrated <- ScaleData(object = seurat.integrated)
#
# seurat.integrated <- RunPCA(merged_seurat_filtered,
# features = Variables_genes_after_exclusion,
# do.print = TRUE,
# pcs.print = 1:5,
# genes.print = 15)
#
# seurat.integrated <- RunUMAP(object = seurat.integrated, dims = 1:50)
p3 <- DimPlot(seurat.integrated, reduction = "umap", group.by = "cell_line", label = TRUE, label.box = TRUE, repel = TRUE)
p4 <- DimPlot(seurat.integrated,reduction = "umap", group.by = "RNA_snn_res.0.5", label = TRUE, label.box = TRUE, repel = TRUE)
grid.arrange(p1, p2, p3, p4, ncol = 2, nrow = 2)

DimPlot(seurat.integrated, reduction = "umap", group.by = "cell_line", label = TRUE, label.box = TRUE, repel = TRUE)

DimPlot(seurat.integrated, reduction = "umap", group.by = "RNA_snn_res.0.5", label = TRUE, label.box = TRUE, repel = TRUE)

DimPlot(seurat.integrated, reduction = "umap", group.by = "predicted.celltype.l2", label = TRUE, label.box = TRUE, repel = TRUE)

FeaturePlot
myfeatures <- c("CD3E", "CD4", "CD8A", "NKG7", "GNLY", "MS4A1", "CD14", "LYZ", "MS4A7", "FCGR3A", "CST3", "FCER1A")
FeaturePlot(seurat.integrated, reduction = "umap", dims = 1:2, features = myfeatures, ncol = 4, order = T) + NoLegend() + NoAxes() + NoGrid()

6. Save the Seurat object as an Robj file
#save(alldata.int, file = "Integrated_by_rpca_Harmony.Robj")
LS0tCnRpdGxlOiAiSW50ZWdyYXRpb24gYnkgQ0NBX2J5X0siCmF1dGhvcjogTmFzaXIgTWFobW9vZCBBYmJhc2kKZGF0ZTogIjIwMjQtMDUtMTAiCm91dHB1dDoKICBodG1sX25vdGVib29rOiAKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICB0b2NfY29sbGFwc2VkOiB0cnVlCiAgICB0aGVtZTogZGFya2x5Ci0tLQojIDEuIGxvYWQgbGlicmFyaWVzCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQpsaWJyYXJ5KFNldXJhdCkKbGlicmFyeShTZXVyYXRPYmplY3QpCmxpYnJhcnkoU2V1cmF0RGF0YSkKbGlicmFyeShwYXRjaHdvcmspCmxpYnJhcnkoaGFybW9ueSkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KHJldGljdWxhdGUpCmxpYnJhcnkoQXppbXV0aCkKbGlicmFyeShkcGx5cikKbGlicmFyeShSdHNuZSkKbGlicmFyeShoYXJtb255KQpsaWJyYXJ5KGdyaWRFeHRyYSkKYGBgCgojIDIuIExvYWQgU2V1cmF0IE9iamVjdCAKYGBge3IgbG9hZF9zZXVyYXR9CgojIGxvYWQoIkFsbFNhbXBsZV9jb3JyZWN0ZWRfQXppbXV0aEFubm90YXRlZF9MMS5Sb2JqIikKCiMgbWVyZ2VkX3NldXJhdF9maWx0ZXJlZCA8LSBBbGxfc2FtcGxlc19NZXJnZWQKCm1lcmdlZF9zZXVyYXRfZmlsdGVyZWQKCmBgYAoKCiMgMy4gRGF0YSBQUkVQRVJBVElPTgpgYGB7ciBkYXRhLCBmaWcuaGVpZ2h0PTYsIGZpZy53aWR0aD0xMH0KCiMgcGVyZm9ybSBzdGFuZGFyZCB3b3JrZmxvdyBzdGVwcyB0byBmaWd1cmUgb3V0IGlmIHdlIHNlZSBhbnkgYmF0Y2ggZWZmZWN0cyAtLS0tLS0tLQojIG1lcmdlZF9zZXVyYXRfZmlsdGVyZWQgPC0gTm9ybWFsaXplRGF0YShvYmplY3QgPSBtZXJnZWRfc2V1cmF0X2ZpbHRlcmVkLCB2ZXJib3NlID0gRkFMU0UpCiMgbWVyZ2VkX3NldXJhdF9maWx0ZXJlZCA8LSBGaW5kVmFyaWFibGVGZWF0dXJlcyhvYmplY3QgPSBtZXJnZWRfc2V1cmF0X2ZpbHRlcmVkICwgc2VsZWN0aW9uLm1ldGhvZCA9ICJ2c3QiLCBuZmVhdHVyZXMgPSAyMDAwLHZlcmJvc2UgPSBGQUxTRSkKIyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAojIG1lcmdlZF9zZXVyYXRfZmlsdGVyZWQgPC0gU2NhbGVEYXRhKG9iamVjdCA9IG1lcmdlZF9zZXVyYXRfZmlsdGVyZWQpCiMgCiMgCiMgVmFyaWFibGVfZ2VuZXMgPC0gbWVyZ2VkX3NldXJhdF9maWx0ZXJlZEBhc3NheXMkUk5BQHZhci5mZWF0dXJlcwojIAojIFZhcmlhYmxlc19nZW5lc19hZnRlcl9leGNsdXNpb24gPC0gVmFyaWFibGVfZ2VuZXNbIWdyZXBsKCJeSExBLXxeWGlzdCIsIFZhcmlhYmxlX2dlbmVzKV0KIyAgIAojIG1lcmdlZF9zZXVyYXRfZmlsdGVyZWQgPC0gUnVuUENBKG1lcmdlZF9zZXVyYXRfZmlsdGVyZWQsCiMgICAgICAgICAgICAgICAgICAgICAgICAgIGZlYXR1cmVzID0gVmFyaWFibGVzX2dlbmVzX2FmdGVyX2V4Y2x1c2lvbiwKIyAgICAgICAgICAgICAgICAgICAgICAgICAgZG8ucHJpbnQgPSBUUlVFLCAKIyAgICAgICAgICAgICAgICAgICAgICAgICAgcGNzLnByaW50ID0gMTo1LCAKIyAgICAgICAgICAgICAgICAgICAgICAgICAgZ2VuZXMucHJpbnQgPSAxNSkKCkVsYm93UGxvdChtZXJnZWRfc2V1cmF0X2ZpbHRlcmVkKQoKCiMgbWVyZ2VkX3NldXJhdF9maWx0ZXJlZCA8LSBGaW5kTmVpZ2hib3JzKG9iamVjdCA9IG1lcmdlZF9zZXVyYXRfZmlsdGVyZWQsIGRpbXMgPSAxOjIwLCB2ZXJib3NlID0gRkFMU0UpCiMgbWVyZ2VkX3NldXJhdF9maWx0ZXJlZCA8LSBGaW5kQ2x1c3RlcnMob2JqZWN0ID0gbWVyZ2VkX3NldXJhdF9maWx0ZXJlZCwgcmVzb2x1dGlvbiA9IDEuMikKIyBtZXJnZWRfc2V1cmF0X2ZpbHRlcmVkIDwtIFJ1blVNQVAob2JqZWN0ID0gbWVyZ2VkX3NldXJhdF9maWx0ZXJlZCwgZGltcyA9IDE6MjApCgoKIyBwbG90CnAxIDwtIERpbVBsb3QobWVyZ2VkX3NldXJhdF9maWx0ZXJlZCwgcmVkdWN0aW9uID0gInVtYXAiLCBncm91cC5ieSA9ICJjZWxsX2xpbmUiLCBsYWJlbCA9IFRSVUUsIGxhYmVsLmJveCA9IFRSVUUsIHJlcGVsID0gVFJVRSkKcDIgPC0gRGltUGxvdChtZXJnZWRfc2V1cmF0X2ZpbHRlcmVkLCByZWR1Y3Rpb24gPSAidW1hcCIsIGdyb3VwLmJ5ID0gIlJOQV9zbm5fcmVzLjAuNSIsIGxhYmVsID0gVFJVRSwgbGFiZWwuYm94ID0gVFJVRSwgcmVwZWwgPSBUUlVFKQoKZ3JpZC5hcnJhbmdlKHAxLCBwMiwgbmNvbCA9IDIsIG5yb3cgPSAyKQoKCkRpbVBsb3QobWVyZ2VkX3NldXJhdF9maWx0ZXJlZCwgcmVkdWN0aW9uID0gInVtYXAiLCBncm91cC5ieSA9ICJjZWxsX2xpbmUiLCBsYWJlbCA9IFRSVUUsIGxhYmVsLmJveCA9IFRSVUUsIHJlcGVsID0gVFJVRSkKCkRpbVBsb3QobWVyZ2VkX3NldXJhdF9maWx0ZXJlZCwgcmVkdWN0aW9uID0gInVtYXAiLCBncm91cC5ieSA9ICJSTkFfc25uX3Jlcy4wLjUiLCBsYWJlbCA9IFRSVUUsIGxhYmVsLmJveCA9IFRSVUUsIHJlcGVsID0gVFJVRSkKRGltUGxvdChtZXJnZWRfc2V1cmF0X2ZpbHRlcmVkLCByZWR1Y3Rpb24gPSAidW1hcCIsIGdyb3VwLmJ5ID0gInByZWRpY3RlZC5jZWxsdHlwZS5sMiIsIGxhYmVsID0gVFJVRSwgbGFiZWwuYm94ID0gVFJVRSwgcmVwZWwgPSBUUlVFKQoKYGBgCgoKIyA0LiBDQ0EtaW50ZWdyYXRpb24KYGBge3IgaW50ZWdyYXRpb24tcnBjYSwgZmlnLmhlaWdodD02LCBmaWcud2lkdGg9MTB9CgojIHBlcmZvcm0gaW50ZWdyYXRpb24gdG8gY29ycmVjdCBmb3IgYmF0Y2ggZWZmZWN0cyAtLS0tLS0KIyBvYmoubGlzdCA8LSBTcGxpdE9iamVjdChtZXJnZWRfc2V1cmF0X2ZpbHRlcmVkLCBzcGxpdC5ieSA9ICJvcmlnLmlkZW50IikKIyBmb3IoaSBpbiAxOmxlbmd0aChvYmoubGlzdCkpewojICAgb2JqLmxpc3RbW2ldXSA8LSBOb3JtYWxpemVEYXRhKG9iamVjdCA9IG9iai5saXN0W1tpXV0sIHZlcmJvc2UgPSBGQUxTRSkKIyAgIG9iai5saXN0W1tpXV0gPC0gRmluZFZhcmlhYmxlRmVhdHVyZXMob2JqZWN0ID0gb2JqLmxpc3RbW2ldXSwgc2VsZWN0aW9uLm1ldGhvZCA9ICJ2c3QiLCBuZmVhdHVyZXMgPSAyMDAwLHZlcmJvc2UgPSBGQUxTRSkKIyB9CgoKCiMgc2VsZWN0IGludGVncmF0aW9uIGZlYXR1cmVzCiMgZmVhdHVyZXMgPC0gU2VsZWN0SW50ZWdyYXRpb25GZWF0dXJlcyhvYmplY3QubGlzdCA9IG9iai5saXN0KQoKCgojIGZpbmQgaW50ZWdyYXRpb24gYW5jaG9ycyAoQ0NBKQojICMgYW5jaG9ycyA8LSBGaW5kSW50ZWdyYXRpb25BbmNob3JzKG9iamVjdC5saXN0ID0gb2JqLmxpc3QsCiMgICAgICAgICAgICAgICAgICAgICAgICBhbmNob3IuZmVhdHVyZXMgPSBmZWF0dXJlcykKCiMgaW50ZWdyYXRlIGRhdGEKIyBzZXVyYXQuaW50ZWdyYXRlZCA8LSBJbnRlZ3JhdGVEYXRhKGFuY2hvcnNldCA9IGFuY2hvcnMpCgoKIyBTY2FsZSBkYXRhLCBydW4gUENBIGFuZCBVTUFQIGFuZCB2aXN1YWxpemUgaW50ZWdyYXRlZCBkYXRhCiMgc2V1cmF0LmludGVncmF0ZWQgPC0gU2NhbGVEYXRhKG9iamVjdCA9IHNldXJhdC5pbnRlZ3JhdGVkKQojIAojIHNldXJhdC5pbnRlZ3JhdGVkIDwtICBSdW5QQ0EobWVyZ2VkX3NldXJhdF9maWx0ZXJlZCwKIyAgICAgICAgICAgICAgICAgICAgICAgICAgZmVhdHVyZXMgPSBWYXJpYWJsZXNfZ2VuZXNfYWZ0ZXJfZXhjbHVzaW9uLAojICAgICAgICAgICAgICAgICAgICAgICAgICBkby5wcmludCA9IFRSVUUsIAojICAgICAgICAgICAgICAgICAgICAgICAgICBwY3MucHJpbnQgPSAxOjUsIAojICAgICAgICAgICAgICAgICAgICAgICAgICBnZW5lcy5wcmludCA9IDE1KQojIAojIHNldXJhdC5pbnRlZ3JhdGVkIDwtIFJ1blVNQVAob2JqZWN0ID0gc2V1cmF0LmludGVncmF0ZWQsIGRpbXMgPSAxOjUwKQoKCnAzIDwtIERpbVBsb3Qoc2V1cmF0LmludGVncmF0ZWQsIHJlZHVjdGlvbiA9ICJ1bWFwIiwgZ3JvdXAuYnkgPSAiY2VsbF9saW5lIiwgbGFiZWwgPSBUUlVFLCBsYWJlbC5ib3ggPSBUUlVFLCByZXBlbCA9IFRSVUUpCnA0IDwtIERpbVBsb3Qoc2V1cmF0LmludGVncmF0ZWQscmVkdWN0aW9uID0gInVtYXAiLCBncm91cC5ieSA9ICJSTkFfc25uX3Jlcy4wLjUiLCBsYWJlbCA9IFRSVUUsIGxhYmVsLmJveCA9IFRSVUUsIHJlcGVsID0gVFJVRSkKCgpncmlkLmFycmFuZ2UocDEsIHAyLCBwMywgcDQsIG5jb2wgPSAyLCBucm93ID0gMikKCkRpbVBsb3Qoc2V1cmF0LmludGVncmF0ZWQsIHJlZHVjdGlvbiA9ICJ1bWFwIiwgZ3JvdXAuYnkgPSAiY2VsbF9saW5lIiwgbGFiZWwgPSBUUlVFLCBsYWJlbC5ib3ggPSBUUlVFLCByZXBlbCA9IFRSVUUpCgpEaW1QbG90KHNldXJhdC5pbnRlZ3JhdGVkLCByZWR1Y3Rpb24gPSAidW1hcCIsIGdyb3VwLmJ5ID0gIlJOQV9zbm5fcmVzLjAuNSIsIGxhYmVsID0gVFJVRSwgbGFiZWwuYm94ID0gVFJVRSwgcmVwZWwgPSBUUlVFKQpEaW1QbG90KHNldXJhdC5pbnRlZ3JhdGVkLCByZWR1Y3Rpb24gPSAidW1hcCIsIGdyb3VwLmJ5ID0gInByZWRpY3RlZC5jZWxsdHlwZS5sMiIsIGxhYmVsID0gVFJVRSwgbGFiZWwuYm94ID0gVFJVRSwgcmVwZWwgPSBUUlVFKQoKYGBgCgoKIyBGZWF0dXJlUGxvdApgYGB7ciBmZWF0dXJlcGxvdC1oYXJtb255LCBmaWcuaGVpZ2h0PTYsIGZpZy53aWR0aD0xMH0KCgpteWZlYXR1cmVzIDwtIGMoIkNEM0UiLCAiQ0Q0IiwgIkNEOEEiLCAiTktHNyIsICJHTkxZIiwgIk1TNEExIiwgIkNEMTQiLCAiTFlaIiwgIk1TNEE3IiwgIkZDR1IzQSIsICJDU1QzIiwgIkZDRVIxQSIpCgpGZWF0dXJlUGxvdChzZXVyYXQuaW50ZWdyYXRlZCwgcmVkdWN0aW9uID0gInVtYXAiLCBkaW1zID0gMToyLCBmZWF0dXJlcyA9IG15ZmVhdHVyZXMsIG5jb2wgPSA0LCBvcmRlciA9IFQpICsgTm9MZWdlbmQoKSArIE5vQXhlcygpICsgTm9HcmlkKCkKCmBgYAoKCiMgNi4gU2F2ZSB0aGUgU2V1cmF0IG9iamVjdCBhcyBhbiBSb2JqIGZpbGUKYGBge3Igc2F2ZVJPQkp9Cgojc2F2ZShhbGxkYXRhLmludCwgZmlsZSA9ICJJbnRlZ3JhdGVkX2J5X3JwY2FfSGFybW9ueS5Sb2JqIikKCgpgYGAKCgoKCg==