1. load libraries

library(Seurat)
Loading required package: SeuratObject
Loading required package: sp

Attaching package: 'SeuratObject'
The following objects are masked from 'package:base':

    intersect, t
library(SeuratObject)
library(SeuratData)
── Installed datasets ──────────────────────────────── SeuratData v0.2.2.9001 ──
✔ pbmcref 1.0.0                         ✔ pbmcsca 3.0.0
────────────────────────────────────── Key ─────────────────────────────────────
✔ Dataset loaded successfully
❯ Dataset built with a newer version of Seurat than installed
❓ Unknown version of Seurat installed
library(patchwork)
library(Azimuth)
Registered S3 method overwritten by 'SeuratDisk':
  method            from  
  as.sparse.H5Group Seurat
Attaching shinyBS
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ lubridate 1.9.3     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.1
✔ readr     2.1.5     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(rmarkdown)
library(tinytex)


library(dplyr)
library(dittoSeq)
library(ggrepel)
#library(ggtree)
library(parallel)
library(plotly)  # 3D plot

Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout
library(Seurat)  # Idents()
library(SeuratDisk)  # SaveH5Seurat()

Attaching package: 'SeuratDisk'

The following object is masked from 'package:Azimuth':

    Connect
library(tibble)  # rownnames_to_column
library(harmony) # RunHarmony()
Loading required package: Rcpp
#options(mc.cores = detectCores() - 1)

2. Load Seurat Object

#Load Seurat Object merged from cell lines and a control(PBMC) after filtration
SS_All_samples_Merged <- load("0-OBJ/CD4Tcells_no_B_cells_in_L4_SCT_Ready_for_integration.robj")

All_samples_Merged
An object of class Seurat 
62931 features across 49388 samples within 6 assays 
Active assay: SCT (26179 features, 3000 variable features)
 3 layers present: counts, data, scale.data
 5 other assays present: RNA, ADT, prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3
 4 dimensional reductions calculated: integrated_dr, ref.umap, pca, umap

3. Data PREPARATION

options(future.globals.maxSize = 1024 * 1024 * 1024)  # 1 GB

# Data Preparation for Seurat v5
alldata <- All_samples_Merged



# Split the object by 'orig.ident' for individual dataset processing
alldata.list <- SplitObject(alldata, split.by = "orig.ident")

# Normalize and identify variable features for each dataset in the list
alldata.list <- lapply(X = alldata.list, FUN = function(x) {
    x <- SCTransform(x, verbose = F)
    })
Warning: Different cells and/or features from existing assay SCT
Different cells and/or features from existing assay SCT
Different cells and/or features from existing assay SCT
Different cells and/or features from existing assay SCT
Different cells and/or features from existing assay SCT
Different cells and/or features from existing assay SCT
Different cells and/or features from existing assay SCT
Different cells and/or features from existing assay SCT
Different cells and/or features from existing assay SCT
# Select integration features across datasets
hvgs_all <- SelectIntegrationFeatures(alldata.list, nfeatures = 3000)

# Scale and PCA on each dataset using selected integration features
alldata.list <- lapply(alldata.list, function(x) {
    x <- RunPCA(x, features = hvgs_all, verbose = FALSE)
})

4. rpca-integration

alldata.list <- PrepSCTIntegration(alldata.list, anchor.features = hvgs_all)

alldata.anchors <- FindIntegrationAnchors(object.list = alldata.list, anchor.features = hvgs_all, reduction =  "rpca", normalization.method = "SCT")
Computing within dataset neighborhoods
Finding all pairwise anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1794 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1172 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1919 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 864 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1419 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 2468 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1082 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1824 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 2446 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 2977 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1192 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1579 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 2551 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1739 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 2608 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 977 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1707 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 2052 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 2733 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 3686 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 2933 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 656 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 650 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 628 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 555 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 602 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 810 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 631 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 517 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 433 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 409 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 334 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 424 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 472 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 381 anchors
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1247 anchors
alldata.int <- IntegrateData(anchorset = alldata.anchors, normalization.method = "SCT")
[1] 1
Warning: Different cells and/or features from existing assay SCT
Warning: Layer counts isn't present in the assay object; returning NULL
[1] 2
Warning: Different cells and/or features from existing assay SCT
Layer counts isn't present in the assay object; returning NULL
[1] 3
Warning: Different cells and/or features from existing assay SCT
Layer counts isn't present in the assay object; returning NULL
[1] 4
Warning: Different cells and/or features from existing assay SCT
Layer counts isn't present in the assay object; returning NULL
[1] 5
Warning: Different cells and/or features from existing assay SCT
Layer counts isn't present in the assay object; returning NULL
[1] 6
Warning: Different cells and/or features from existing assay SCT
Layer counts isn't present in the assay object; returning NULL
[1] 7
Warning: Different cells and/or features from existing assay SCT
Layer counts isn't present in the assay object; returning NULL
[1] 8
Warning: Different cells and/or features from existing assay SCT
Layer counts isn't present in the assay object; returning NULL
[1] 9
Warning: Different cells and/or features from existing assay SCT
Layer counts isn't present in the assay object; returning NULL
Merging dataset 7 into 5
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
Integrating data
Warning: Layer counts isn't present in the assay object; returning NULL
Merging dataset 6 into 5 7
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
Integrating data
Warning: Layer counts isn't present in the assay object; returning NULL
Merging dataset 4 into 3
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
Integrating data
Warning: Layer counts isn't present in the assay object; returning NULL
Merging dataset 9 into 8
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
Integrating data
Warning: Layer counts isn't present in the assay object; returning NULL
Merging dataset 3 4 into 5 7 6
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
Integrating data
Warning: Layer counts isn't present in the assay object; returning NULL
Merging dataset 1 into 2
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
Integrating data
Warning: Layer counts isn't present in the assay object; returning NULL
Merging dataset 2 1 into 5 7 6 3 4
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
Integrating data
Warning: Layer counts isn't present in the assay object; returning NULL
Merging dataset 8 9 into 5 7 6 3 4 2 1
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
Integrating data
Warning: Layer counts isn't present in the assay object; returning NULL
Warning in asMethod(object): sparse->dense coercion: allocating vector of size
1.1 GiB
Warning: Assay integrated changing from Assay to SCTAssay
Warning: Layer counts isn't present in the assay object; returning NULL
Warning: Different cells and/or features from existing assay SCT

Integration visualization-rpca

DefaultAssay(alldata.int) <- "integrated"

#Run Dimensionality reduction on integrated space
alldata.int <- RunPCA(alldata.int, features = hvgs_all, npcs = 30, do.print = TRUE, pcs.print = 1:5, genes.print = 15, verbose = FALSE)
alldata.int <- RunUMAP(alldata.int,  dims = 1:30, verbose = FALSE)
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
# by assay
P1 <- DimPlot(alldata.int, reduction = "umap", group.by = "cell_line")+ ggtitle("UMAP seurat_integrated by assay")
P1

# alldata.int <- FindNeighbors(alldata.int, reduction = "pca_rpca", dims = 1:30, verbose = FALSE)
# alldata.int <- FindClusters(alldata.int, resolution = 0.5, verbose = FALSE)

# by celltype
P2 <- DimPlot(alldata.int, reduction = "umap", group.by = "cell_line")+ ggtitle("UMAP seurat_integrated by cell line")
P2

# by celltype
P3 <- DimPlot(alldata.int, reduction = "umap", group.by = "predicted.celltype.l2", label = T, label.box = T)+ ggtitle("UMAP seurat_integrated by cell line")
P3

LS0tCnRpdGxlOiAiU2V1cmF0IEludGVncmF0aW9uIG9mIFBCTUMxMHgtSFBDLXJwY2EiCmF1dGhvcjogTmFzaXIgTWFobW9vZCBBYmJhc2kKZGF0ZTogImByIFN5cy5EYXRlKClgIgpvdXRwdXQ6CiAgI3JtZGZvcm1hdHM6OnJlYWR0aGVkb3duCiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICB0b2NfY29sbGFwc2VkOiB0cnVlCi0tLQoKIyAxLiBsb2FkIGxpYnJhcmllcwpgYGB7ciBzZXR1cH0KCmxpYnJhcnkoU2V1cmF0KQpsaWJyYXJ5KFNldXJhdE9iamVjdCkKbGlicmFyeShTZXVyYXREYXRhKQpsaWJyYXJ5KHBhdGNod29yaykKbGlicmFyeShBemltdXRoKQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHJtYXJrZG93bikKbGlicmFyeSh0aW55dGV4KQoKCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoZGl0dG9TZXEpCmxpYnJhcnkoZ2dyZXBlbCkKI2xpYnJhcnkoZ2d0cmVlKQpsaWJyYXJ5KHBhcmFsbGVsKQpsaWJyYXJ5KHBsb3RseSkgICMgM0QgcGxvdApsaWJyYXJ5KFNldXJhdCkgICMgSWRlbnRzKCkKbGlicmFyeShTZXVyYXREaXNrKSAgIyBTYXZlSDVTZXVyYXQoKQpsaWJyYXJ5KHRpYmJsZSkgICMgcm93bm5hbWVzX3RvX2NvbHVtbgpsaWJyYXJ5KGhhcm1vbnkpICMgUnVuSGFybW9ueSgpCgojb3B0aW9ucyhtYy5jb3JlcyA9IGRldGVjdENvcmVzKCkgLSAxKQoKCgpgYGAKCgojIDIuIExvYWQgU2V1cmF0IE9iamVjdCAKYGBge3IgbG9hZF9zZXVyYXR9CgojTG9hZCBTZXVyYXQgT2JqZWN0IG1lcmdlZCBmcm9tIGNlbGwgbGluZXMgYW5kIGEgY29udHJvbChQQk1DKSBhZnRlciBmaWx0cmF0aW9uClNTX0FsbF9zYW1wbGVzX01lcmdlZCA8LSBsb2FkKCIwLU9CSi9DRDRUY2VsbHNfbm9fQl9jZWxsc19pbl9MNF9TQ1RfUmVhZHlfZm9yX2ludGVncmF0aW9uLnJvYmoiKQoKQWxsX3NhbXBsZXNfTWVyZ2VkCmBgYAoKCgoKIyAzLiBEYXRhIFBSRVBBUkFUSU9OCmBgYHtyIGRhdGEsIGZpZy5oZWlnaHQ9OCwgZmlnLndpZHRoPTEyfQpvcHRpb25zKGZ1dHVyZS5nbG9iYWxzLm1heFNpemUgPSAxMDI0ICogMTAyNCAqIDEwMjQpICAjIDEgR0IKCiMgRGF0YSBQcmVwYXJhdGlvbiBmb3IgU2V1cmF0IHY1CmFsbGRhdGEgPC0gQWxsX3NhbXBsZXNfTWVyZ2VkCgoKCiMgU3BsaXQgdGhlIG9iamVjdCBieSAnb3JpZy5pZGVudCcgZm9yIGluZGl2aWR1YWwgZGF0YXNldCBwcm9jZXNzaW5nCmFsbGRhdGEubGlzdCA8LSBTcGxpdE9iamVjdChhbGxkYXRhLCBzcGxpdC5ieSA9ICJvcmlnLmlkZW50IikKCiMgTm9ybWFsaXplIGFuZCBpZGVudGlmeSB2YXJpYWJsZSBmZWF0dXJlcyBmb3IgZWFjaCBkYXRhc2V0IGluIHRoZSBsaXN0CmFsbGRhdGEubGlzdCA8LSBsYXBwbHkoWCA9IGFsbGRhdGEubGlzdCwgRlVOID0gZnVuY3Rpb24oeCkgewogICAgeCA8LSBTQ1RyYW5zZm9ybSh4LCB2ZXJib3NlID0gRikKICAgIH0pCgoKIyBTZWxlY3QgaW50ZWdyYXRpb24gZmVhdHVyZXMgYWNyb3NzIGRhdGFzZXRzCmh2Z3NfYWxsIDwtIFNlbGVjdEludGVncmF0aW9uRmVhdHVyZXMoYWxsZGF0YS5saXN0LCBuZmVhdHVyZXMgPSAzMDAwKQoKIyBTY2FsZSBhbmQgUENBIG9uIGVhY2ggZGF0YXNldCB1c2luZyBzZWxlY3RlZCBpbnRlZ3JhdGlvbiBmZWF0dXJlcwphbGxkYXRhLmxpc3QgPC0gbGFwcGx5KGFsbGRhdGEubGlzdCwgZnVuY3Rpb24oeCkgewogICAgeCA8LSBSdW5QQ0EoeCwgZmVhdHVyZXMgPSBodmdzX2FsbCwgdmVyYm9zZSA9IEZBTFNFKQp9KQoKCmBgYAoKCiMgNC4gcnBjYS1pbnRlZ3JhdGlvbgpgYGB7ciBpbnRlZ3JhdGlvbi1ycGNhMSwgZmlnLmhlaWdodD04LCBmaWcud2lkdGg9MTJ9CgphbGxkYXRhLmxpc3QgPC0gUHJlcFNDVEludGVncmF0aW9uKGFsbGRhdGEubGlzdCwgYW5jaG9yLmZlYXR1cmVzID0gaHZnc19hbGwpCgphbGxkYXRhLmFuY2hvcnMgPC0gRmluZEludGVncmF0aW9uQW5jaG9ycyhvYmplY3QubGlzdCA9IGFsbGRhdGEubGlzdCwgYW5jaG9yLmZlYXR1cmVzID0gaHZnc19hbGwsIHJlZHVjdGlvbiA9ICAicnBjYSIsIG5vcm1hbGl6YXRpb24ubWV0aG9kID0gIlNDVCIpCgphbGxkYXRhLmludCA8LSBJbnRlZ3JhdGVEYXRhKGFuY2hvcnNldCA9IGFsbGRhdGEuYW5jaG9ycywgbm9ybWFsaXphdGlvbi5tZXRob2QgPSAiU0NUIikKCgoKYGBgCgojIyBJbnRlZ3JhdGlvbiB2aXN1YWxpemF0aW9uLXJwY2EKYGBge3IgaW50ZWdyYXRpb24tdmlzdWFsaXphdGlvbjEsIGZpZy5oZWlnaHQ9OCwgZmlnLndpZHRoPTEyfQoKRGVmYXVsdEFzc2F5KGFsbGRhdGEuaW50KSA8LSAiaW50ZWdyYXRlZCIKCiNSdW4gRGltZW5zaW9uYWxpdHkgcmVkdWN0aW9uIG9uIGludGVncmF0ZWQgc3BhY2UKYWxsZGF0YS5pbnQgPC0gUnVuUENBKGFsbGRhdGEuaW50LCBmZWF0dXJlcyA9IGh2Z3NfYWxsLCBucGNzID0gMzAsIGRvLnByaW50ID0gVFJVRSwgcGNzLnByaW50ID0gMTo1LCBnZW5lcy5wcmludCA9IDE1LCB2ZXJib3NlID0gRkFMU0UpCmFsbGRhdGEuaW50IDwtIFJ1blVNQVAoYWxsZGF0YS5pbnQsICBkaW1zID0gMTozMCwgdmVyYm9zZSA9IEZBTFNFKQoKIyBieSBhc3NheQpQMSA8LSBEaW1QbG90KGFsbGRhdGEuaW50LCByZWR1Y3Rpb24gPSAidW1hcCIsIGdyb3VwLmJ5ID0gImNlbGxfbGluZSIpKyBnZ3RpdGxlKCJVTUFQIHNldXJhdF9pbnRlZ3JhdGVkIGJ5IGFzc2F5IikKUDEKCgojIGFsbGRhdGEuaW50IDwtIEZpbmROZWlnaGJvcnMoYWxsZGF0YS5pbnQsIHJlZHVjdGlvbiA9ICJwY2FfcnBjYSIsIGRpbXMgPSAxOjMwLCB2ZXJib3NlID0gRkFMU0UpCiMgYWxsZGF0YS5pbnQgPC0gRmluZENsdXN0ZXJzKGFsbGRhdGEuaW50LCByZXNvbHV0aW9uID0gMC41LCB2ZXJib3NlID0gRkFMU0UpCgojIGJ5IGNlbGx0eXBlClAyIDwtIERpbVBsb3QoYWxsZGF0YS5pbnQsIHJlZHVjdGlvbiA9ICJ1bWFwIiwgZ3JvdXAuYnkgPSAiY2VsbF9saW5lIikrIGdndGl0bGUoIlVNQVAgc2V1cmF0X2ludGVncmF0ZWQgYnkgY2VsbCBsaW5lIikKUDIKCiMgYnkgY2VsbHR5cGUKUDMgPC0gRGltUGxvdChhbGxkYXRhLmludCwgcmVkdWN0aW9uID0gInVtYXAiLCBncm91cC5ieSA9ICJwcmVkaWN0ZWQuY2VsbHR5cGUubDIiLCBsYWJlbCA9IFQsIGxhYmVsLmJveCA9IFQpKyBnZ3RpdGxlKCJVTUFQIHNldXJhdF9pbnRlZ3JhdGVkIGJ5IGNlbGwgbGluZSIpClAzCgpgYGAKCg==