1. load libraries

2. Load Data into Seurat



 All_samples_Merged <- readRDS("../0-Seurat_RDS_OBJECT_FINAL/All_samples_Merged_with_STCAT.rds")


gc()

##. rename columns orig.ident and Patient_origin

# Rename Patient_origin categories
All_samples_Merged$Patient_origin <- factor(All_samples_Merged$Patient_origin,
  levels = c("1", "2", "3", "PBMC", "PBMC_10x"),
  labels = c("P1", "P2", "P3", "CD4T_lab", "CD4T_10x")
)

# Rename and reorder orig.ident
All_samples_Merged$orig.ident <- factor(All_samples_Merged$orig.ident,
  levels = c("L1", "L2", "L3", "L4", "L5", "L6", "L7", "PBMC", "PBMC10x"),
  labels = c("L1", "L2", "L3", "L4", "L5", "L6", "L7", "CD4T_lab", "CD4_10x")
)

table(All_samples_Merged$Patient_origin)
levels(All_samples_Merged$Patient_origin)

table(All_samples_Merged$orig.ident)
levels(All_samples_Merged$orig.ident)

table(All_samples_Merged$cell_line)
levels(All_samples_Merged$cell_line)

Save the RDS after changes



saveRDS(All_samples_Merged, file = "../0-Seurat_RDS_OBJECT_FINAL/All_samples_Merged_with_STCAT_and_renamed_FINAL.rds")

##. Load Data into Seurat



 All_samples_Merged <- readRDS("../0-Seurat_RDS_OBJECT_FINAL/All_samples_Merged_with_STCAT_and_renamed_FINAL.rds")


gc()
             used   (Mb) gc trigger    (Mb)   max used   (Mb)
Ncells    8325391  444.7   12521134   668.8   11475082  612.9
Vcells 1231309712 9394.2 1528401147 11660.8 1231313627 9394.2

##. rename columns orig.ident and Patient_origin


table(All_samples_Merged$Patient_origin)

      P1       P2       P3 CD4T_lab CD4T_10x 
   11760    12434    16501     5106     3504 
levels(All_samples_Merged$Patient_origin)
[1] "P1"       "P2"       "P3"       "CD4T_lab" "CD4T_10x"
table(All_samples_Merged$orig.ident)

      L1       L2       L3       L4       L5       L6       L7 CD4T_lab  CD4_10x 
    5825     5935     6428     6006     6022     5148     5331     5106     3504 
levels(All_samples_Merged$orig.ident)
[1] "L1"       "L2"       "L3"       "L4"       "L5"       "L6"       "L7"       "CD4T_lab" "CD4_10x" 
table(All_samples_Merged$cell_line)

           L1            L2            L3            L4            L5            L6            L7 CD4Tcells_lab 
         5825          5935          6428          6006          6022          5148          5331          5106 
CD4Tcells_10x 
         3504 
levels(All_samples_Merged$cell_line)
[1] "L1"            "L2"            "L3"            "L4"            "L5"            "L6"           
[7] "L7"            "CD4Tcells_lab" "CD4Tcells_10x"

3. Convert Seurat to AnnData (Scanpy’s .h5ad format)

library(SeuratDisk)
Registered S3 method overwritten by 'SeuratDisk':
  method            from  
  as.sparse.H5Group Seurat
# Ensure metadata columns are not factors
i <- sapply(All_samples_Merged@meta.data, is.factor)
All_samples_Merged@meta.data[i] <- lapply(All_samples_Merged@meta.data[i], as.character)

# Set default assay to SCT (where normalized data is stored and all downstream analyses were done)
DefaultAssay(All_samples_Merged) <- "SCT"

# Update to latest Seurat object structure if needed
All_samples_Merged <- UpdateSeuratObject(All_samples_Merged)
Validating object structure
Updating object slots
Ensuring keys are in the proper structure
Updating matrix keys for DimReduc ‘integrated_dr’
Updating matrix keys for DimReduc ‘ref.umap’
Updating matrix keys for DimReduc ‘pca’
Updating matrix keys for DimReduc ‘umap’
Updating matrix keys for DimReduc ‘harmony’
Ensuring keys are in the proper structure
Ensuring feature names don't have underscores or pipes
Updating slots in RNA
Updating slots in ADT
Updating slots in prediction.score.celltype.l1
Updating slots in prediction.score.celltype.l2
Updating slots in prediction.score.celltype.l3
Updating slots in SCT
Updating slots in SCT_nn
Setting default assay of SCT_nn to SCT
Updating slots in SCT_snn
Setting default assay of SCT_snn to SCT
Updating slots in harmony_snn
Cannot find harmony in the object, setting default assay of harmony_snn to SCT
Updating slots in integrated_dr
Updating slots in ref.umap
Setting ref.umap DimReduc to global
Updating slots in pca
Updating slots in umap
Setting umap DimReduc to global
Updating slots in harmony
Setting assay used for SCTransform.RNA to RNA
Setting assay used for RunPCA.SCT to SCT
Setting assay used for FindNeighbors.SCT.pca to SCT
Setting assay used for RunUMAP.SCT.pca to SCT
Setting assay used for Seurat..ProjectDim.SCT.harmony to SCT
Setting assay used for FindNeighbors.SCT.harmony to SCT
No assay information could be found for FindClusters
Warning: Adding a command log without an assay associated with itSetting assay used for RunUMAP.SCT.harmony to SCT
Setting assay used for NormalizeData.ADT to ADT
Validating object structure for Assay5 ‘RNA’
Validating object structure for Assay ‘ADT’
Validating object structure for Assay ‘prediction.score.celltype.l1’
Validating object structure for Assay ‘prediction.score.celltype.l2’
Validating object structure for Assay ‘prediction.score.celltype.l3’
Validating object structure for SCTAssay ‘SCT’
Validating object structure for Graph ‘SCT_nn’
Validating object structure for Graph ‘SCT_snn’
Validating object structure for Graph ‘harmony_snn’
Validating object structure for DimReduc ‘integrated_dr’
Validating object structure for DimReduc ‘ref.umap’
Validating object structure for DimReduc ‘pca’
Validating object structure for DimReduc ‘umap’
Validating object structure for DimReduc ‘harmony’
Object representation is consistent with the most current Seurat version
# Save as h5Seurat file
SaveH5Seurat(All_samples_Merged, filename = "All_samples_Merged_for_PAGA.h5seurat", overwrite = TRUE)
Creating h5Seurat file for version 3.1.5.9900
Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
Please use the `layer` argument instead.Adding counts for ADT
Adding data for ADT
No variable features found for ADT
No feature-level metadata found for ADT
Adding data for prediction.score.celltype.l1
No variable features found for prediction.score.celltype.l1
No feature-level metadata found for prediction.score.celltype.l1
Adding data for prediction.score.celltype.l2
No variable features found for prediction.score.celltype.l2
No feature-level metadata found for prediction.score.celltype.l2
Adding data for prediction.score.celltype.l3
No variable features found for prediction.score.celltype.l3
No feature-level metadata found for prediction.score.celltype.l3
Adding counts for SCT
Adding data for SCT
Adding scale.data for SCT
Adding variable features for SCT
No feature-level metadata found for SCT
Writing out SCTModel.list for SCT
Adding cell embeddings for integrated_dr
No loadings for integrated_dr
No projected loadings for integrated_dr
No standard deviations for integrated_dr
No JackStraw data for integrated_dr
Adding cell embeddings for ref.umap
No loadings for ref.umap
No projected loadings for ref.umap
No standard deviations for ref.umap
No JackStraw data for ref.umap
Adding cell embeddings for pca
Adding loadings for pca
No projected loadings for pca
Adding standard deviations for pca
No JackStraw data for pca
Adding cell embeddings for umap
No loadings for umap
No projected loadings for umap
No standard deviations for umap
No JackStraw data for umap
Adding cell embeddings for harmony
Adding loadings for harmony
Adding projected loadings for harmony
Adding standard deviations for harmony
No JackStraw data for harmony
# Convert h5Seurat to h5ad (Scanpy-readable)
Convert("All_samples_Merged_for_PAGA.h5seurat", dest = "All_samples_Merged_for_PAGA.h5ad", overwrite = TRUE)
Validating h5Seurat file
Adding scale.data from SCT as X
Adding data from SCT as raw
Transfering meta.data to obs
Adding dimensional reduction information for harmony
Adding feature loadings for harmony
Adding dimensional reduction information for integrated_dr
Adding dimensional reduction information for pca
Adding feature loadings for pca
Adding dimensional reduction information for umap
Adding dimensional reduction information for ref.umap (global)
Adding harmony_snn as neighbors
LS0tCnRpdGxlOiAiVHJhamVjdG9yeSBpbmZlcmVuY2UgdXNpbmcgUEFHQSAoU2NhbnB5IFRvb2xraXQpLWg1ZEZvcm1hdC1TZXVyYXREaXNrIgphdXRob3I6IE5hc2lyIE1haG1vb2QgQWJiYXNpCmRhdGU6ICJgciBTeXMuRGF0ZSgpYCIKb3V0cHV0OgogICNybWRmb3JtYXRzOjpyZWFkdGhlZG93bgogIGh0bWxfbm90ZWJvb2s6CiAgICB0b2M6IHRydWUKICAgIHRvY19mbG9hdDogdHJ1ZQogICAgdG9jX2NvbGxhcHNlZDogdHJ1ZQotLS0KCiMgMS4gbG9hZCBsaWJyYXJpZXMKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CgpsaWJyYXJ5KFNldXJhdCkKbGlicmFyeShTZXVyYXRPYmplY3QpCmxpYnJhcnkoU2V1cmF0RGF0YSkKbGlicmFyeShTaW5nbGVDZWxsRXhwZXJpbWVudCkKbGlicmFyeShwYXRjaHdvcmspCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkocm1hcmtkb3duKQpsaWJyYXJ5KHRpbnl0ZXgpCmxpYnJhcnkoemVsbGtvbnZlcnRlcikKbGlicmFyeSh0aWJibGUpCgpgYGAKCgojIDIuIExvYWQgRGF0YSBpbnRvIFNldXJhdApgYGB7ciBsb2FkX3NldXJhdH0KCgogQWxsX3NhbXBsZXNfTWVyZ2VkIDwtIHJlYWRSRFMoIi4uLzAtU2V1cmF0X1JEU19PQkpFQ1RfRklOQUwvQWxsX3NhbXBsZXNfTWVyZ2VkX3dpdGhfU1RDQVQucmRzIikKCgpnYygpCgpgYGAKIyMuIHJlbmFtZSBjb2x1bW5zIG9yaWcuaWRlbnQgYW5kIFBhdGllbnRfb3JpZ2luCmBgYHtyfQojIFJlbmFtZSBQYXRpZW50X29yaWdpbiBjYXRlZ29yaWVzCkFsbF9zYW1wbGVzX01lcmdlZCRQYXRpZW50X29yaWdpbiA8LSBmYWN0b3IoQWxsX3NhbXBsZXNfTWVyZ2VkJFBhdGllbnRfb3JpZ2luLAogIGxldmVscyA9IGMoIjEiLCAiMiIsICIzIiwgIlBCTUMiLCAiUEJNQ18xMHgiKSwKICBsYWJlbHMgPSBjKCJQMSIsICJQMiIsICJQMyIsICJDRDRUX2xhYiIsICJDRDRUXzEweCIpCikKCiMgUmVuYW1lIGFuZCByZW9yZGVyIG9yaWcuaWRlbnQKQWxsX3NhbXBsZXNfTWVyZ2VkJG9yaWcuaWRlbnQgPC0gZmFjdG9yKEFsbF9zYW1wbGVzX01lcmdlZCRvcmlnLmlkZW50LAogIGxldmVscyA9IGMoIkwxIiwgIkwyIiwgIkwzIiwgIkw0IiwgIkw1IiwgIkw2IiwgIkw3IiwgIlBCTUMiLCAiUEJNQzEweCIpLAogIGxhYmVscyA9IGMoIkwxIiwgIkwyIiwgIkwzIiwgIkw0IiwgIkw1IiwgIkw2IiwgIkw3IiwgIkNENFRfbGFiIiwgIkNENF8xMHgiKQopCgp0YWJsZShBbGxfc2FtcGxlc19NZXJnZWQkUGF0aWVudF9vcmlnaW4pCmxldmVscyhBbGxfc2FtcGxlc19NZXJnZWQkUGF0aWVudF9vcmlnaW4pCgp0YWJsZShBbGxfc2FtcGxlc19NZXJnZWQkb3JpZy5pZGVudCkKbGV2ZWxzKEFsbF9zYW1wbGVzX01lcmdlZCRvcmlnLmlkZW50KQoKdGFibGUoQWxsX3NhbXBsZXNfTWVyZ2VkJGNlbGxfbGluZSkKbGV2ZWxzKEFsbF9zYW1wbGVzX01lcmdlZCRjZWxsX2xpbmUpCgpgYGAKIyMgU2F2ZSB0aGUgUkRTIGFmdGVyIGNoYW5nZXMKYGBge3J9CgoKc2F2ZVJEUyhBbGxfc2FtcGxlc19NZXJnZWQsIGZpbGUgPSAiLi4vMC1TZXVyYXRfUkRTX09CSkVDVF9GSU5BTC9BbGxfc2FtcGxlc19NZXJnZWRfd2l0aF9TVENBVF9hbmRfcmVuYW1lZF9GSU5BTC5yZHMiKQoKCgpgYGAKCiMjLiBMb2FkIERhdGEgaW50byBTZXVyYXQKYGBge3J9CgoKIEFsbF9zYW1wbGVzX01lcmdlZCA8LSByZWFkUkRTKCIuLi8wLVNldXJhdF9SRFNfT0JKRUNUX0ZJTkFML0FsbF9zYW1wbGVzX01lcmdlZF93aXRoX1NUQ0FUX2FuZF9yZW5hbWVkX0ZJTkFMLnJkcyIpCgoKZ2MoKQoKYGBgCgojIy4gcmVuYW1lIGNvbHVtbnMgb3JpZy5pZGVudCBhbmQgUGF0aWVudF9vcmlnaW4KYGBge3J9Cgp0YWJsZShBbGxfc2FtcGxlc19NZXJnZWQkUGF0aWVudF9vcmlnaW4pCmxldmVscyhBbGxfc2FtcGxlc19NZXJnZWQkUGF0aWVudF9vcmlnaW4pCgp0YWJsZShBbGxfc2FtcGxlc19NZXJnZWQkb3JpZy5pZGVudCkKbGV2ZWxzKEFsbF9zYW1wbGVzX01lcmdlZCRvcmlnLmlkZW50KQoKdGFibGUoQWxsX3NhbXBsZXNfTWVyZ2VkJGNlbGxfbGluZSkKbGV2ZWxzKEFsbF9zYW1wbGVzX01lcmdlZCRjZWxsX2xpbmUpCgpgYGAKCgojIDMuIENvbnZlcnQgU2V1cmF0IHRvIEFubkRhdGEgKFNjYW5weeKAmXMgLmg1YWQgZm9ybWF0KQpgYGB7ciBBbm5EYXRhLCBmaWcuaGVpZ2h0PTYsIGZpZy53aWR0aD0xMH0KbGlicmFyeShTZXVyYXREaXNrKQoKIyBFbnN1cmUgbWV0YWRhdGEgY29sdW1ucyBhcmUgbm90IGZhY3RvcnMKaSA8LSBzYXBwbHkoQWxsX3NhbXBsZXNfTWVyZ2VkQG1ldGEuZGF0YSwgaXMuZmFjdG9yKQpBbGxfc2FtcGxlc19NZXJnZWRAbWV0YS5kYXRhW2ldIDwtIGxhcHBseShBbGxfc2FtcGxlc19NZXJnZWRAbWV0YS5kYXRhW2ldLCBhcy5jaGFyYWN0ZXIpCgojIFNldCBkZWZhdWx0IGFzc2F5IHRvIFNDVCAod2hlcmUgbm9ybWFsaXplZCBkYXRhIGlzIHN0b3JlZCBhbmQgYWxsIGRvd25zdHJlYW0gYW5hbHlzZXMgd2VyZSBkb25lKQpEZWZhdWx0QXNzYXkoQWxsX3NhbXBsZXNfTWVyZ2VkKSA8LSAiU0NUIgoKIyBVcGRhdGUgdG8gbGF0ZXN0IFNldXJhdCBvYmplY3Qgc3RydWN0dXJlIGlmIG5lZWRlZApBbGxfc2FtcGxlc19NZXJnZWQgPC0gVXBkYXRlU2V1cmF0T2JqZWN0KEFsbF9zYW1wbGVzX01lcmdlZCkKCiMgU2F2ZSBhcyBoNVNldXJhdCBmaWxlClNhdmVINVNldXJhdChBbGxfc2FtcGxlc19NZXJnZWQsIGZpbGVuYW1lID0gIkFsbF9zYW1wbGVzX01lcmdlZF9mb3JfUEFHQS5oNXNldXJhdCIsIG92ZXJ3cml0ZSA9IFRSVUUpCgojIENvbnZlcnQgaDVTZXVyYXQgdG8gaDVhZCAoU2NhbnB5LXJlYWRhYmxlKQpDb252ZXJ0KCJBbGxfc2FtcGxlc19NZXJnZWRfZm9yX1BBR0EuaDVzZXVyYXQiLCBkZXN0ID0gIkFsbF9zYW1wbGVzX01lcmdlZF9mb3JfUEFHQS5oNWFkIiwgb3ZlcndyaXRlID0gVFJVRSkKCmBgYAoKCgoKCgo=