1. load libraries

#Differential Expression Analysis

2. load seurat object

#Load Seurat Object L7
load("../0-IMP-OBJECTS/Harmony_integrated_All_samples_Merged_with_PBMC10x_with_harmony_clustering.Robj")


All_samples_Merged
An object of class Seurat 
64169 features across 59355 samples within 6 assays 
Active assay: SCT (27417 features, 3000 variable features)
 3 layers present: counts, data, scale.data
 5 other assays present: RNA, ADT, prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3
 6 dimensional reductions calculated: integrated_dr, ref.umap, pca, umap, harmony, umap.harmony
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line",label = T, label.box = T)

DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "Harmony_snn_res.0.9",label = T, label.box = T)

3. silhouette Score Test

library(Seurat)
library(cluster)
library(ggplot2)

# Assuming `All_samples_Merged` is already created and contains both PCA and Harmony embeddings
# Define the range of PCs used in SCTransform
pc_range <- 1:13

# Extract PCA and Harmony embeddings
pca_embedding <- Embeddings(All_samples_Merged, reduction = "pca")[, pc_range]
harmony_embedding <- Embeddings(All_samples_Merged, reduction = "harmony")[, pc_range]

# Retrieve predicted cell type labels and batch labels
celltype_labels <- All_samples_Merged$predicted.celltype.l2  # Using predicted.celltype.l2
batch_labels <- All_samples_Merged$orig.ident        # Adjust based on actual batch/condition column

# Ensure labels are numeric (for silhouette function)
celltype_labels <- as.numeric(as.factor(celltype_labels))
batch_labels <- as.numeric(as.factor(batch_labels))

# Compute silhouette scores for PCA embeddings
silhouette_pca_celltypes <- silhouette(celltype_labels, dist(pca_embedding))
silhouette_pca_batches <- silhouette(batch_labels, dist(pca_embedding))

# Compute silhouette scores for Harmony embeddings
silhouette_harmony_celltypes <- silhouette(celltype_labels, dist(harmony_embedding))
silhouette_harmony_batches <- silhouette(batch_labels, dist(harmony_embedding))

# Convert silhouette objects to data frames for visualization
silhouette_df <- data.frame(
  method = rep(c("PCA", "Harmony"), each = length(celltype_labels) * 2),
  group = c(rep("Cell Type", length(celltype_labels)), rep("Batch", length(celltype_labels))),
  silhouette_width = c(
    silhouette_pca_celltypes[, "sil_width"], silhouette_pca_batches[, "sil_width"],
    silhouette_harmony_celltypes[, "sil_width"], silhouette_harmony_batches[, "sil_width"]
  )
)

# Plot silhouette scores for cell types and batches across PCA and Harmony
ggplot(silhouette_df, aes(x = method, y = silhouette_width, fill = group)) +
  geom_boxplot() +
  theme_minimal() +
  labs(
    title = "Comparison of Silhouette Scores for PCA and Harmony",
    x = "Embedding Method",
    y = "Silhouette Width"
  ) +
  facet_wrap(~ group, scales = "free") +
  theme(legend.position = "none")


# Print average silhouette scores
cat("Average silhouette scores:\n")
Average silhouette scores:
cat("PCA (Cell Type): ", mean(silhouette_pca_celltypes[, "sil_width"]), "\n")
PCA (Cell Type):  -0.1546204 
cat("PCA (Batch): ", mean(silhouette_pca_batches[, "sil_width"]), "\n")
PCA (Batch):  0.3232747 
cat("Harmony (Cell Type): ", mean(silhouette_harmony_celltypes[, "sil_width"]), "\n")
Harmony (Cell Type):  -0.1344296 
cat("Harmony (Batch): ", mean(silhouette_harmony_batches[, "sil_width"]), "\n")
Harmony (Batch):  0.05910068 
LS0tCnRpdGxlOiAiU8OpemFyeSBTeW5kcm9tZSBzaWxob3VldHRlIFNjb3JlIFRlc3QtUGFydDIiCmF1dGhvcjogTmFzaXIgTWFobW9vZCBBYmJhc2kKZGF0ZTogImByIFN5cy5EYXRlKClgIgpvdXRwdXQ6CiAgIyBwZGZfZG9jdW1lbnQ6IGRlZmF1bHQKICAjIHdvcmRfZG9jdW1lbnQ6IGRlZmF1bHQKICAjIGh0bWxfZG9jdW1lbnQ6IGRlZmF1bHQKICAjcm1kZm9ybWF0czo6cmVhZHRoZWRvd24KICBodG1sX25vdGVib29rOgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIHRvY19jb2xsYXBzZWQ6IHRydWUKLS0tCgojIDEuIGxvYWQgbGlicmFyaWVzCmBgYHtyIHNldHVwMSwgaW5jbHVkZT1GQUxTRX0KIyMgaHR0cHM6Ly9lbmJsYWNhci5naXRodWIuaW8vU0NwdWJyLWJvb2stdjEvMDQtRmVhdHVyZVBsb3RzLmh0bWwKCgojIExvYWQgcmVxdWlyZWQgbGlicmFyaWVzCmxpYnJhcnkoU2V1cmF0KQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KHNjQ3VzdG9taXplKQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkocGhlYXRtYXApCmxpYnJhcnkoU0NwdWJyKQpsaWJyYXJ5KFNldXJhdEludGVncmF0ZSkKCmBgYAojRGlmZmVyZW50aWFsIEV4cHJlc3Npb24gQW5hbHlzaXMKCiMgMi4gbG9hZCBzZXVyYXQgb2JqZWN0CmBgYHtyIGxvYWRfc2V1cmF0fQojTG9hZCBTZXVyYXQgT2JqZWN0IEw3CmxvYWQoIi4uLzAtSU1QLU9CSkVDVFMvSGFybW9ueV9pbnRlZ3JhdGVkX0FsbF9zYW1wbGVzX01lcmdlZF93aXRoX1BCTUMxMHhfd2l0aF9oYXJtb255X2NsdXN0ZXJpbmcuUm9iaiIpCgoKQWxsX3NhbXBsZXNfTWVyZ2VkCgpEaW1QbG90KEFsbF9zYW1wbGVzX01lcmdlZCwgcmVkdWN0aW9uID0gInVtYXAuaGFybW9ueSIsIGdyb3VwLmJ5ID0gImNlbGxfbGluZSIsbGFiZWwgPSBULCBsYWJlbC5ib3ggPSBUKQpEaW1QbG90KEFsbF9zYW1wbGVzX01lcmdlZCwgcmVkdWN0aW9uID0gInVtYXAuaGFybW9ueSIsIGdyb3VwLmJ5ID0gIkhhcm1vbnlfc25uX3Jlcy4wLjkiLGxhYmVsID0gVCwgbGFiZWwuYm94ID0gVCkKCmBgYAoKCgojIDMuIHNpbGhvdWV0dGUgU2NvcmUgVGVzdApgYGB7ciBzaWxob3VldHRlLCBmaWcuaGVpZ2h0PTgsIGZpZy53aWR0aD0xMn0KbGlicmFyeShTZXVyYXQpCmxpYnJhcnkoY2x1c3RlcikKbGlicmFyeShnZ3Bsb3QyKQoKIyBBc3N1bWluZyBgQWxsX3NhbXBsZXNfTWVyZ2VkYCBpcyBhbHJlYWR5IGNyZWF0ZWQgYW5kIGNvbnRhaW5zIGJvdGggUENBIGFuZCBIYXJtb255IGVtYmVkZGluZ3MKIyBEZWZpbmUgdGhlIHJhbmdlIG9mIFBDcyB1c2VkIGluIFNDVHJhbnNmb3JtCnBjX3JhbmdlIDwtIDE6MTMKCiMgRXh0cmFjdCBQQ0EgYW5kIEhhcm1vbnkgZW1iZWRkaW5ncwpwY2FfZW1iZWRkaW5nIDwtIEVtYmVkZGluZ3MoQWxsX3NhbXBsZXNfTWVyZ2VkLCByZWR1Y3Rpb24gPSAicGNhIilbLCBwY19yYW5nZV0KaGFybW9ueV9lbWJlZGRpbmcgPC0gRW1iZWRkaW5ncyhBbGxfc2FtcGxlc19NZXJnZWQsIHJlZHVjdGlvbiA9ICJoYXJtb255IilbLCBwY19yYW5nZV0KCiMgUmV0cmlldmUgcHJlZGljdGVkIGNlbGwgdHlwZSBsYWJlbHMgYW5kIGJhdGNoIGxhYmVscwpjZWxsdHlwZV9sYWJlbHMgPC0gQWxsX3NhbXBsZXNfTWVyZ2VkJHByZWRpY3RlZC5jZWxsdHlwZS5sMiAgIyBVc2luZyBwcmVkaWN0ZWQuY2VsbHR5cGUubDIKYmF0Y2hfbGFiZWxzIDwtIEFsbF9zYW1wbGVzX01lcmdlZCRvcmlnLmlkZW50ICAgICAgICAjIEFkanVzdCBiYXNlZCBvbiBhY3R1YWwgYmF0Y2gvY29uZGl0aW9uIGNvbHVtbgoKIyBFbnN1cmUgbGFiZWxzIGFyZSBudW1lcmljIChmb3Igc2lsaG91ZXR0ZSBmdW5jdGlvbikKY2VsbHR5cGVfbGFiZWxzIDwtIGFzLm51bWVyaWMoYXMuZmFjdG9yKGNlbGx0eXBlX2xhYmVscykpCmJhdGNoX2xhYmVscyA8LSBhcy5udW1lcmljKGFzLmZhY3RvcihiYXRjaF9sYWJlbHMpKQoKIyBDb21wdXRlIHNpbGhvdWV0dGUgc2NvcmVzIGZvciBQQ0EgZW1iZWRkaW5ncwpzaWxob3VldHRlX3BjYV9jZWxsdHlwZXMgPC0gc2lsaG91ZXR0ZShjZWxsdHlwZV9sYWJlbHMsIGRpc3QocGNhX2VtYmVkZGluZykpCnNpbGhvdWV0dGVfcGNhX2JhdGNoZXMgPC0gc2lsaG91ZXR0ZShiYXRjaF9sYWJlbHMsIGRpc3QocGNhX2VtYmVkZGluZykpCgojIENvbXB1dGUgc2lsaG91ZXR0ZSBzY29yZXMgZm9yIEhhcm1vbnkgZW1iZWRkaW5ncwpzaWxob3VldHRlX2hhcm1vbnlfY2VsbHR5cGVzIDwtIHNpbGhvdWV0dGUoY2VsbHR5cGVfbGFiZWxzLCBkaXN0KGhhcm1vbnlfZW1iZWRkaW5nKSkKc2lsaG91ZXR0ZV9oYXJtb255X2JhdGNoZXMgPC0gc2lsaG91ZXR0ZShiYXRjaF9sYWJlbHMsIGRpc3QoaGFybW9ueV9lbWJlZGRpbmcpKQoKIyBDb252ZXJ0IHNpbGhvdWV0dGUgb2JqZWN0cyB0byBkYXRhIGZyYW1lcyBmb3IgdmlzdWFsaXphdGlvbgpzaWxob3VldHRlX2RmIDwtIGRhdGEuZnJhbWUoCiAgbWV0aG9kID0gcmVwKGMoIlBDQSIsICJIYXJtb255IiksIGVhY2ggPSBsZW5ndGgoY2VsbHR5cGVfbGFiZWxzKSAqIDIpLAogIGdyb3VwID0gYyhyZXAoIkNlbGwgVHlwZSIsIGxlbmd0aChjZWxsdHlwZV9sYWJlbHMpKSwgcmVwKCJCYXRjaCIsIGxlbmd0aChjZWxsdHlwZV9sYWJlbHMpKSksCiAgc2lsaG91ZXR0ZV93aWR0aCA9IGMoCiAgICBzaWxob3VldHRlX3BjYV9jZWxsdHlwZXNbLCAic2lsX3dpZHRoIl0sIHNpbGhvdWV0dGVfcGNhX2JhdGNoZXNbLCAic2lsX3dpZHRoIl0sCiAgICBzaWxob3VldHRlX2hhcm1vbnlfY2VsbHR5cGVzWywgInNpbF93aWR0aCJdLCBzaWxob3VldHRlX2hhcm1vbnlfYmF0Y2hlc1ssICJzaWxfd2lkdGgiXQogICkKKQoKIyBQbG90IHNpbGhvdWV0dGUgc2NvcmVzIGZvciBjZWxsIHR5cGVzIGFuZCBiYXRjaGVzIGFjcm9zcyBQQ0EgYW5kIEhhcm1vbnkKZ2dwbG90KHNpbGhvdWV0dGVfZGYsIGFlcyh4ID0gbWV0aG9kLCB5ID0gc2lsaG91ZXR0ZV93aWR0aCwgZmlsbCA9IGdyb3VwKSkgKwogIGdlb21fYm94cGxvdCgpICsKICB0aGVtZV9taW5pbWFsKCkgKwogIGxhYnMoCiAgICB0aXRsZSA9ICJDb21wYXJpc29uIG9mIFNpbGhvdWV0dGUgU2NvcmVzIGZvciBQQ0EgYW5kIEhhcm1vbnkiLAogICAgeCA9ICJFbWJlZGRpbmcgTWV0aG9kIiwKICAgIHkgPSAiU2lsaG91ZXR0ZSBXaWR0aCIKICApICsKICBmYWNldF93cmFwKH4gZ3JvdXAsIHNjYWxlcyA9ICJmcmVlIikgKwogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJub25lIikKCiMgUHJpbnQgYXZlcmFnZSBzaWxob3VldHRlIHNjb3JlcwpjYXQoIkF2ZXJhZ2Ugc2lsaG91ZXR0ZSBzY29yZXM6XG4iKQpjYXQoIlBDQSAoQ2VsbCBUeXBlKTogIiwgbWVhbihzaWxob3VldHRlX3BjYV9jZWxsdHlwZXNbLCAic2lsX3dpZHRoIl0pLCAiXG4iKQpjYXQoIlBDQSAoQmF0Y2gpOiAiLCBtZWFuKHNpbGhvdWV0dGVfcGNhX2JhdGNoZXNbLCAic2lsX3dpZHRoIl0pLCAiXG4iKQpjYXQoIkhhcm1vbnkgKENlbGwgVHlwZSk6ICIsIG1lYW4oc2lsaG91ZXR0ZV9oYXJtb255X2NlbGx0eXBlc1ssICJzaWxfd2lkdGgiXSksICJcbiIpCmNhdCgiSGFybW9ueSAoQmF0Y2gpOiAiLCBtZWFuKHNpbGhvdWV0dGVfaGFybW9ueV9iYXRjaGVzWywgInNpbF93aWR0aCJdKSwgIlxuIikKCmBgYAo=