1. load libraries

2. Load Seurat Object


#Load Seurat Object merged from cell lines and a control(PBMC) after filtration
SS_All_samples_Merged <- load("0-OBJ/CD4Tcells_no_B_cells_in_L4_SCT_Ready_for_integration.robj")

All_samples_Merged
An object of class Seurat 
62931 features across 49388 samples within 6 assays 
Active assay: SCT (26179 features, 3000 variable features)
 3 layers present: counts, data, scale.data
 5 other assays present: RNA, ADT, prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3
 4 dimensional reductions calculated: integrated_dr, ref.umap, pca, umap

3. Data PREPARATION

options(future.globals.maxSize = 1024 * 1024 * 1024)  # 1 GB

# Data Preparation for Seurat v5
alldata <- All_samples_Merged



# Split the object by 'orig.ident' for individual dataset processing
alldata.list <- SplitObject(alldata, split.by = "orig.ident")

# Normalize and identify variable features for each dataset in the list
alldata.list <- lapply(X = alldata.list, FUN = function(x) {
    x <- SCTransform(x, verbose = F)
    })
Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : Different cells and/or features from existing assay SCTAvis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : Different cells and/or features from existing assay SCTAvis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : Different cells and/or features from existing assay SCTAvis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : Different cells and/or features from existing assay SCTAvis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : Different cells and/or features from existing assay SCTAvis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : Different cells and/or features from existing assay SCTAvis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : Different cells and/or features from existing assay SCTAvis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : Different cells and/or features from existing assay SCTAvis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : useNames = NA is deprecated. Instead, specify either useNames = TRUE or useNames = FALSE.Avis : Different cells and/or features from existing assay SCT
# Select integration features across datasets
hvgs_all <- SelectIntegrationFeatures(alldata.list, nfeatures = 3000)

# Scale and PCA on each dataset using selected integration features
alldata.list <- lapply(alldata.list, function(x) {
    x <- RunPCA(x, features = hvgs_all, verbose = FALSE)
})

4. rpca-integration


alldata.list <- PrepSCTIntegration(alldata.list, anchor.features = hvgs_all)

  |                                                  | 0 % ~calculating  
  |++++++                                            | 11% ~21s          
  |++++++++++++                                      | 22% ~19s          
  |+++++++++++++++++                                 | 33% ~16s          
  |+++++++++++++++++++++++                           | 44% ~14s          
  |++++++++++++++++++++++++++++                      | 56% ~11s          
  |++++++++++++++++++++++++++++++++++                | 67% ~08s          
  |+++++++++++++++++++++++++++++++++++++++           | 78% ~05s          
  |+++++++++++++++++++++++++++++++++++++++++++++     | 89% ~03s          
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=23s  
alldata.anchors <- FindIntegrationAnchors(object.list = alldata.list, anchor.features = hvgs_all, reduction =  "rpca", normalization.method = "SCT")
Computing within dataset neighborhoods

  |                                                  | 0 % ~calculating  
  |++++++                                            | 11% ~15s          
  |++++++++++++                                      | 22% ~14s          
  |+++++++++++++++++                                 | 33% ~12s          
  |+++++++++++++++++++++++                           | 44% ~10s          
  |++++++++++++++++++++++++++++                      | 56% ~08s          
  |++++++++++++++++++++++++++++++++++                | 67% ~06s          
  |+++++++++++++++++++++++++++++++++++++++           | 78% ~04s          
  |+++++++++++++++++++++++++++++++++++++++++++++     | 89% ~02s          
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=16s  
Finding all pairwise anchors

  |                                                  | 0 % ~calculating  
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1789 anchors

  |++                                                | 3 % ~05m 35s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1153 anchors

  |+++                                               | 6 % ~05m 27s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1908 anchors

  |+++++                                             | 8 % ~05m 24s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 859 anchors

  |++++++                                            | 11% ~05m 16s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1441 anchors

  |+++++++                                           | 14% ~05m 06s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 2444 anchors

  |+++++++++                                         | 17% ~04m 56s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1083 anchors

  |++++++++++                                        | 19% ~04m 45s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1830 anchors

  |++++++++++++                                      | 22% ~04m 39s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 2432 anchors

  |+++++++++++++                                     | 25% ~04m 29s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 2937 anchors

  |++++++++++++++                                    | 28% ~04m 19s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1187 anchors

  |++++++++++++++++                                  | 31% ~04m 07s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1629 anchors

  |+++++++++++++++++                                 | 33% ~03m 56s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 2564 anchors

  |+++++++++++++++++++                               | 36% ~03m 46s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1762 anchors

  |++++++++++++++++++++                              | 39% ~03m 36s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 2614 anchors

  |+++++++++++++++++++++                             | 42% ~03m 26s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 988 anchors

  |+++++++++++++++++++++++                           | 44% ~03m 15s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1736 anchors

  |++++++++++++++++++++++++                          | 47% ~03m 06s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 2066 anchors

  |+++++++++++++++++++++++++                         | 50% ~02m 56s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 2784 anchors

  |+++++++++++++++++++++++++++                       | 53% ~02m 46s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 3732 anchors

  |++++++++++++++++++++++++++++                      | 56% ~02m 36s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 2935 anchors

  |++++++++++++++++++++++++++++++                    | 58% ~02m 26s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 655 anchors

  |+++++++++++++++++++++++++++++++                   | 61% ~02m 15s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 662 anchors

  |++++++++++++++++++++++++++++++++                  | 64% ~02m 05s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 637 anchors

  |++++++++++++++++++++++++++++++++++                | 67% ~01m 55s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 567 anchors

  |+++++++++++++++++++++++++++++++++++               | 69% ~01m 46s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 603 anchors

  |+++++++++++++++++++++++++++++++++++++             | 72% ~01m 36s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 774 anchors

  |++++++++++++++++++++++++++++++++++++++            | 75% ~01m 26s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 650 anchors

  |+++++++++++++++++++++++++++++++++++++++           | 78% ~01m 16s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 519 anchors

  |+++++++++++++++++++++++++++++++++++++++++         | 81% ~01m 06s      
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 432 anchors

  |++++++++++++++++++++++++++++++++++++++++++        | 83% ~56s          
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 402 anchors

  |++++++++++++++++++++++++++++++++++++++++++++      | 86% ~47s          
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 337 anchors

  |+++++++++++++++++++++++++++++++++++++++++++++     | 89% ~37s          
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 416 anchors

  |++++++++++++++++++++++++++++++++++++++++++++++    | 92% ~28s          
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 468 anchors

  |++++++++++++++++++++++++++++++++++++++++++++++++  | 94% ~18s          
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 382 anchors

  |+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~09s          
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
    Found 1241 anchors

  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=05m 27s
alldata.int <- IntegrateData(anchorset = alldata.anchors, normalization.method = "SCT")
[1] 1
Avis : Different cells and/or features from existing assay SCTAvis : Layer counts isn't present in the assay object; returning NULL
[1] 2
Avis : Different cells and/or features from existing assay SCTAvis : Layer counts isn't present in the assay object; returning NULL
[1] 3
Avis : Different cells and/or features from existing assay SCTAvis : Layer counts isn't present in the assay object; returning NULL
[1] 4
Avis : Different cells and/or features from existing assay SCTAvis : Layer counts isn't present in the assay object; returning NULL
[1] 5
Avis : Different cells and/or features from existing assay SCTAvis : Layer counts isn't present in the assay object; returning NULL
[1] 6
Avis : Different cells and/or features from existing assay SCTAvis : Layer counts isn't present in the assay object; returning NULL
[1] 7
Avis : Different cells and/or features from existing assay SCTAvis : Layer counts isn't present in the assay object; returning NULL
[1] 8
Avis : Different cells and/or features from existing assay SCTAvis : Layer counts isn't present in the assay object; returning NULL
[1] 9
Avis : Different cells and/or features from existing assay SCTAvis : Layer counts isn't present in the assay object; returning NULLMerging dataset 7 into 5
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Avis : Layer counts isn't present in the assay object; returning NULLMerging dataset 6 into 5 7
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Avis : Layer counts isn't present in the assay object; returning NULLMerging dataset 4 into 3
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Avis : Layer counts isn't present in the assay object; returning NULLMerging dataset 9 into 8
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Avis : Layer counts isn't present in the assay object; returning NULLMerging dataset 3 4 into 5 7 6
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Avis : Layer counts isn't present in the assay object; returning NULLMerging dataset 1 into 2
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Avis : Layer counts isn't present in the assay object; returning NULLMerging dataset 2 1 into 5 7 6 3 4
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Avis : Layer counts isn't present in the assay object; returning NULLMerging dataset 8 9 into 5 7 6 3 4 2 1
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Avis : Layer counts isn't present in the assay object; returning NULLAvis : sparse->dense coercion: allocating vector of size 1.1 GiBAvis : Assay integrated changing from Assay to SCTAssayAvis : Layer counts isn't present in the assay object; returning NULLAvis : Different cells and/or features from existing assay SCT

Integration visualization-rpca


DefaultAssay(alldata.int) <- "integrated"

#Run Dimensionality reduction on integrated space
alldata.int <- RunPCA(alldata.int, features = hvgs_all, npcs = 50, do.print = TRUE, pcs.print = 1:5, genes.print = 15, verbose = FALSE)

# TEST-1
# get significant PCs
stdv <- All_samples_Merged[["pca"]]@stdev
sum.stdv <- sum(All_samples_Merged[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100
cumulative <- cumsum(percent.stdv)
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co2 <- sort(which((percent.stdv[1:length(percent.stdv) - 1] - 
                       percent.stdv[2:length(percent.stdv)]) > 0.1), 
              decreasing = T)[1] + 1
min.pc <- min(co1, co2)
min.pc
[1] 16
alldata.int <- RunUMAP(alldata.int,  dims = 1:16, verbose = FALSE)

# by cell line
P1 <- DimPlot(alldata.int, reduction = "umap", group.by = "cell_line")+ ggtitle("UMAP seurat_integrated_rpca by cell line")
P1


DimPlot(alldata.int, reduction = "umap", group.by = "cell_line", label = T, label.box = T)+ ggtitle("UMAP seurat_integrated_rpca by cell line")


alldata.int <- FindNeighbors(alldata.int, reduction = "pca", dims = 1:16, verbose = FALSE)
alldata.int <- FindClusters(alldata.int, resolution = 0.5, verbose = FALSE)

# by celltype
P2 <- DimPlot(alldata.int, reduction = "umap", group.by = "cell_line")+ ggtitle("UMAP seurat_integrated_rpca by cell line")
P2


DimPlot(alldata.int, reduction = "umap", group.by = "cell_line", label = T, label.box = T)+ ggtitle("UMAP seurat_integrated_rpca by cell line")


# by celltype
P3 <- DimPlot(alldata.int, reduction = "umap", group.by = "predicted.celltype.l2", label = T, label.box = T)+ ggtitle("UMAP seurat_integrated_ by Annotation")
P3


DimPlot(alldata.int, reduction = "umap", group.by = "predicted.celltype.l2", label = T, label.box = T)+ ggtitle("UMAP seurat_integrated_ by Annotation")


# Visualize UMAP with clusters
DimPlot(alldata.int, reduction = "umap", group.by = "seurat_clusters", label = TRUE, label.box = TRUE) +
  ggtitle("UMAP with Clusters (Resolution 0.5)")

Marker Gene Visualization


DefaultAssay(alldata.int) <- "SCT"


# Set marker genes specific to requested immune cell types
myfeatures1 <- c("CD19", "CD79A", "MS4A1", # B cells
                "CD14", "LYZ", "FCGR3A", # Monocytes
                "CSF1R", "CD68", # Macrophages
                "NKG7", "GNLY", "KIR3DL1", # NK cells
                "MKI67", # Proliferating NK cells
                "CD34", "KIT", # HSPCs
                "CD3E", "CCR7", # T cells
                "SELL", "CD45RO", # Tnaive, Tcm
                "CD44", "CD45RA") # Tem, Temra

cd4_feature_plot1 <- FeaturePlot(
  alldata.int, 
  features = myfeatures1, 
  reduction = "umap", 
  ncol = 4
) + 
  ggtitle("CD4 T Cell Marker Expression - Harmony Integration") +
  NoLegend()
Avis : Could not find CD34 in the default search locations, found in 'RNA' assay insteadAvis : Could not find CD45RO in the default search locations, found in 'ADT' assay insteadAvis : Could not find CD45RA in the default search locations, found in 'ADT' assay instead
# Display the plot
print(cd4_feature_plot1)


# Define markers specific to CD4 T cells and their subsets
cd4_markers <- c(
  "CD4",          # General CD4 T cells
  "IL7R",         # Naive T cells
  "CCR7",         # T central memory (Tcm) cells
  "SELL",         # T naive cells
  "FOXP3",        # Regulatory T cells (Tregs)
  "IL2RA",        # Activated T cells
  "PDCD1",        # Exhausted T cells
  "LAG3",         # Exhausted T cells
  "TIGIT",        # Exhausted T cells
  "GATA3",        # Th2 cells
  "TBX21",        # Th1 cells
  "RORC",         # Th17 cells
  "BCL6"          # T follicular helper (Tfh) cells
)

# Visualize marker genes for CD4 T cells
cd4_feature_plot2 <- FeaturePlot(
  alldata.int, 
  features = cd4_markers, 
  reduction = "umap", 
  ncol = 4
) + 
  ggtitle("CD4 T Cell Marker Expression - Harmony Integration") +
  NoLegend()

# Display the plot
print(cd4_feature_plot2)

CD4 T Cell Marker Visualization

# Set marker genes specific to CD4 T cell biology and states
cd4_markers <- c(
    # Core T cell markers
    "CD3E",     # T cell marker
    "CD4",      # CD4 T cell marker
    
    # Naive/Memory markers
    "CCR7",     # Naive/Central memory
    "SELL",     # L-selectin, naive marker
    "CD27",     # Memory marker
    "IL7R",     # Naive/Memory marker
    
    # Activation/State markers
    "IL2RA",    # CD25, activation marker
    "CD69",     # Early activation
    "HLA-DRA",  # Activation marker
    
    # Exhaustion markers
    "PDCD1",    # PD-1
    "LAG3",     # Exhaustion marker
    "TIGIT",    # Exhaustion marker
    
    # Regulatory T cell markers
    "FOXP3",    # Treg marker
    "IL2RA",    # CD25, Treg marker
    "CTLA4",    # Treg/exhaustion marker
    
    # Effector/Function markers
    "IL2",      # T cell function
    "IFNG",     # Th1
    "IL4",      # Th2
    "IL13",     # Th2
    "IL17A"     # Th17
)

# Create feature plots with better visualization
FeaturePlot(alldata.int, 
            features = cd4_markers, 
            reduction = "umap", 
            ncol = 4,
            pt.size = 0.1,           # Smaller point size for better resolution
            min.cutoff = "q1",       # Remove bottom 1% of expression
            max.cutoff = "q99",      # Remove top 1% of expression
            order = TRUE) +          # Plot highest expressing cells on top
    ggtitle("CD4 T Cell Marker Expression - Harmony Integration") +
    theme(plot.title = element_text(size = 16, face = "bold")) +
    NoLegend()
Avis : Found the following features in more than one assay, excluding the default. We will not include these in the final data frame: PDCD1, LAG3, FOXP3, CTLA4, IL2, IFNG, IL4, IL13, IL17AAvis : The following requested variables were not found: PDCD1, LAG3, FOXP3, CTLA4, IL2, IFNG, IL4, IL13, IL17A

# Optional: Add violin plots to see expression distribution across clusters
VlnPlot(alldata.int, 
        features = cd4_markers[1:20], # First 8 markers
        stack = TRUE,
        flip = TRUE) +
        ggtitle("CD4 T Cell Marker Distribution Across Clusters")
Avis : Found the following features in more than one assay, excluding the default. We will not include these in the final data frame: PDCD1, LAG3, FOXP3, CTLA4, IL2, IFNG, IL4, IL13, IL17AAvis : The following requested variables were not found: PDCD1, LAG3, FOXP3, CTLA4, IL2, IFNG, IL4, IL13, IL17A

---
title: "Seurat Integration of PBMC10x-Rserver-rpca-part1"
author: Nasir Mahmood Abbasi
date: "`r Sys.Date()`"
output:
  #rmdformats::readthedown
  html_notebook:
    toc: true
    toc_float: true
    toc_collapsed: true
---

# 1. load libraries
```{r setup, include=FALSE}

library(Seurat)
library(SeuratObject)
library(SeuratData)
library(patchwork)
library(Azimuth)
library(dplyr)
library(ggplot2)
library(tidyverse)
library(rmarkdown)
library(tinytex)


library(dplyr)
library(dittoSeq)
library(ggrepel)
#library(ggtree)
library(parallel)
library(plotly)  # 3D plot
library(Seurat)  # Idents()
library(SeuratDisk)  # SaveH5Seurat()
library(tibble)  # rownnames_to_column
library(harmony) # RunHarmony()
#options(mc.cores = detectCores() - 1)



```


# 2. Load Seurat Object 
```{r load_seurat}

#Load Seurat Object merged from cell lines and a control(PBMC) after filtration
load("0-OBJ/CD4Tcells_no_B_cells_in_L4_SCT_Ready_for_integration.robj")

All_samples_Merged
```




# 3. Data PREPARATION
```{r data, fig.height=8, fig.width=12}
options(future.globals.maxSize = 1024 * 1024 * 1024)  # 1 GB

# Data Preparation for Seurat v5
alldata <- All_samples_Merged



# Split the object by 'orig.ident' for individual dataset processing
alldata.list <- SplitObject(alldata, split.by = "orig.ident")

# Normalize and identify variable features for each dataset in the list
alldata.list <- lapply(X = alldata.list, FUN = function(x) {
    x <- SCTransform(x, verbose = F)
    })


# Select integration features across datasets
hvgs_all <- SelectIntegrationFeatures(alldata.list, nfeatures = 3000)

# Scale and PCA on each dataset using selected integration features
alldata.list <- lapply(alldata.list, function(x) {
    x <- RunPCA(x, features = hvgs_all, verbose = FALSE)
})


```


# 4. rpca-integration
```{r integration-rpca1, fig.height=8, fig.width=12}

alldata.list <- PrepSCTIntegration(alldata.list, anchor.features = hvgs_all)

alldata.anchors <- FindIntegrationAnchors(object.list = alldata.list, anchor.features = hvgs_all, reduction =  "rpca", normalization.method = "SCT")

alldata.int <- IntegrateData(anchorset = alldata.anchors, normalization.method = "SCT")



```

## Integration visualization-rpca
```{r integration-visualization1, fig.height=8, fig.width=12}

DefaultAssay(alldata.int) <- "integrated"

#Run Dimensionality reduction on integrated space
alldata.int <- RunPCA(alldata.int, features = hvgs_all, npcs = 50, do.print = TRUE, pcs.print = 1:5, genes.print = 15, verbose = FALSE)

# TEST-1
# get significant PCs
stdv <- All_samples_Merged[["pca"]]@stdev
sum.stdv <- sum(All_samples_Merged[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100
cumulative <- cumsum(percent.stdv)
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co2 <- sort(which((percent.stdv[1:length(percent.stdv) - 1] - 
                       percent.stdv[2:length(percent.stdv)]) > 0.1), 
              decreasing = T)[1] + 1
min.pc <- min(co1, co2)
min.pc


alldata.int <- RunUMAP(alldata.int,  dims = 1:16, verbose = FALSE)

# by cell line
P1 <- DimPlot(alldata.int, reduction = "umap", group.by = "cell_line")+ ggtitle("UMAP seurat_integrated_rpca by cell line")
P1

DimPlot(alldata.int, reduction = "umap", group.by = "cell_line", label = T, label.box = T)+ ggtitle("UMAP seurat_integrated_rpca by cell line")

alldata.int <- FindNeighbors(alldata.int, reduction = "pca", dims = 1:16, verbose = FALSE)
alldata.int <- FindClusters(alldata.int, resolution = 0.5, verbose = FALSE)

# by celltype
P2 <- DimPlot(alldata.int, reduction = "umap", group.by = "cell_line")+ ggtitle("UMAP seurat_integrated_rpca by cell line")
P2

DimPlot(alldata.int, reduction = "umap", group.by = "cell_line", label = T, label.box = T)+ ggtitle("UMAP seurat_integrated_rpca by cell line")

# by celltype
P3 <- DimPlot(alldata.int, reduction = "umap", group.by = "predicted.celltype.l2", label = T, label.box = T)+ ggtitle("UMAP seurat_integrated_ by Annotation")
P3

DimPlot(alldata.int, reduction = "umap", group.by = "predicted.celltype.l2", label = T, label.box = T)+ ggtitle("UMAP seurat_integrated_ by Annotation")

# Visualize UMAP with clusters
DimPlot(alldata.int, reduction = "umap", group.by = "seurat_clusters", label = TRUE, label.box = TRUE) +
  ggtitle("UMAP with Clusters (Resolution 0.5)")

```



##  Marker Gene Visualization
```{r featureplot-rpca1, fig.height=14, fig.width=18}

DefaultAssay(alldata.int) <- "SCT"


# Set marker genes specific to requested immune cell types
myfeatures1 <- c("CD19", "CD79A", "MS4A1", # B cells
                "CD14", "LYZ", "FCGR3A", # Monocytes
                "CSF1R", "CD68", # Macrophages
                "NKG7", "GNLY", "KIR3DL1", # NK cells
                "MKI67", # Proliferating NK cells
                "CD34", "KIT", # HSPCs
                "CD3E", "CCR7", # T cells
                "SELL", "CD45RO", # Tnaive, Tcm
                "CD44", "CD45RA") # Tem, Temra

cd4_feature_plot1 <- FeaturePlot(
  alldata.int, 
  features = myfeatures1, 
  reduction = "umap", 
  ncol = 4
) + 
  ggtitle("CD4 T Cell Marker Expression - Harmony Integration") +
  NoLegend()

# Display the plot
print(cd4_feature_plot1)

# Define markers specific to CD4 T cells and their subsets
cd4_markers <- c(
  "CD4",          # General CD4 T cells
  "IL7R",         # Naive T cells
  "CCR7",         # T central memory (Tcm) cells
  "SELL",         # T naive cells
  "FOXP3",        # Regulatory T cells (Tregs)
  "IL2RA",        # Activated T cells
  "PDCD1",        # Exhausted T cells
  "LAG3",         # Exhausted T cells
  "TIGIT",        # Exhausted T cells
  "GATA3",        # Th2 cells
  "TBX21",        # Th1 cells
  "RORC",         # Th17 cells
  "BCL6"          # T follicular helper (Tfh) cells
)

# Visualize marker genes for CD4 T cells
cd4_feature_plot2 <- FeaturePlot(
  alldata.int, 
  features = cd4_markers, 
  reduction = "umap", 
  ncol = 4
) + 
  ggtitle("CD4 T Cell Marker Expression - Harmony Integration") +
  NoLegend()

# Display the plot
print(cd4_feature_plot2)
```

##  CD4 T Cell Marker Visualization
```{r featureplot-rpca2, fig.height=12, fig.width=16}
# Set marker genes specific to CD4 T cell biology and states
cd4_markers <- c(
    # Core T cell markers
    "CD3E",     # T cell marker
    "CD4",      # CD4 T cell marker
    
    # Naive/Memory markers
    "CCR7",     # Naive/Central memory
    "SELL",     # L-selectin, naive marker
    "CD27",     # Memory marker
    "IL7R",     # Naive/Memory marker
    
    # Activation/State markers
    "IL2RA",    # CD25, activation marker
    "CD69",     # Early activation
    "HLA-DRA",  # Activation marker
    
    # Exhaustion markers
    "PDCD1",    # PD-1
    "LAG3",     # Exhaustion marker
    "TIGIT",    # Exhaustion marker
    
    # Regulatory T cell markers
    "FOXP3",    # Treg marker
    "IL2RA",    # CD25, Treg marker
    "CTLA4",    # Treg/exhaustion marker
    
    # Effector/Function markers
    "IL2",      # T cell function
    "IFNG",     # Th1
    "IL4",      # Th2
    "IL13",     # Th2
    "IL17A"     # Th17
)

# Create feature plots with better visualization
FeaturePlot(alldata.int, 
            features = cd4_markers, 
            reduction = "umap", 
            ncol = 4,
            pt.size = 0.1,           # Smaller point size for better resolution
            min.cutoff = "q1",       # Remove bottom 1% of expression
            max.cutoff = "q99",      # Remove top 1% of expression
            order = TRUE) +          # Plot highest expressing cells on top
    ggtitle("CD4 T Cell Marker Expression - Harmony Integration") +
    theme(plot.title = element_text(size = 16, face = "bold")) +
    NoLegend()

# Optional: Add violin plots to see expression distribution across clusters
VlnPlot(alldata.int, 
        features = cd4_markers[1:20], # First 8 markers
        stack = TRUE,
        flip = TRUE) +
        ggtitle("CD4 T Cell Marker Distribution Across Clusters")


```



