1. load libraries

2. Load Seurat Object


#Load Seurat Object merged from cell lines and a control(PBMC) after filtration
load("../../../0-IMP-OBJECTS/All_Samples_Merged_with_10x_Azitmuth_Annotated_SCT_HPC_without_harmony_integration.robj")

All_samples_Merged

3. Data PREPARATION and Harmony Integration-1


library(harmony)

All_samples_Merged <- RunHarmony(
  object = All_samples_Merged,
  group.by.vars = "cell_line",  # Replace with the metadata column specifying batch or cell line
  dims.use = 1:22,  # Use the same dimensions as PCA
  assay.use = "SCT",
plot_convergence = TRUE
  )

# Run UMAP on the new Harmony reduction
All_samples_Merged <- RunUMAP(All_samples_Merged, reduction = "harmony", dims = 1:22, reduction.name = "umap.harmony")

# Find neighbors and clusters using the Harmony reduction
All_samples_Merged <- FindNeighbors(All_samples_Merged, reduction = "harmony", dims = 1:22)
All_samples_Merged <- FindClusters(All_samples_Merged, resolution = 0.5)

# Visualize results
p1 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line") + 
  ggtitle("Harmony Integration - By Cell Line")
p2 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters") + 
  ggtitle("Harmony Integration - By Clusters")

# Compare with original UMAP
p3 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line") + 
  ggtitle("Original Integration - By Cell Line")
p4 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters") + 
  ggtitle("Original Integration - By Clusters")

# Print the plots
print(p1 + p2)
print(p3 + p4)

Harmony Visualization-1


# Visualize results
p1 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Cell Line")
p2 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Clusters")

# Compare with original UMAP
p3 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T) + 
  ggtitle("Original Integration - By Cell Line")
p4 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Original Integration - By Clusters")

# Print the plots
print(p1 + p2)
print(p3 + p4)

4. Data PREPARATION and Harmony Integration-2


library(harmony)

All_samples_Merged <- RunHarmony(
  object = All_samples_Merged,
  group.by.vars = "cell_line",  # Replace with the metadata column specifying batch or cell line
  dims.use = 1:22,  # Use the same dimensions as PCA
  assay.use = "SCT",
  Theta = 0.5,
  lambda =0.5,
  plot_convergence = TRUE
  )

# Run UMAP on the new Harmony reduction
All_samples_Merged <- RunUMAP(All_samples_Merged, reduction = "harmony", dims = 1:22, reduction.name = "umap.harmony")

# Find neighbors and clusters using the Harmony reduction
All_samples_Merged <- FindNeighbors(All_samples_Merged, reduction = "harmony", dims = 1:22)
All_samples_Merged <- FindClusters(All_samples_Merged, resolution = 0.5)

Harmony Visualization-2


# Visualize results
p1 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Cell Line")
p2 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Clusters")

# Compare with original UMAP
p3 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T) + 
  ggtitle("Original Integration - By Cell Line")
p4 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Original Integration - By Clusters")

# Print the plots
print(p1 + p2)
print(p3 + p4)
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Cell Line")
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Clusters")
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "predicted.celltype.l2",label = T, label.box = T) + 
  ggtitle("Harmony Integration - Annotations")

5. Data PREPARATION and Harmony Integration-3


# Create a new metadata column for grouping
All_samples_Merged$sample_group <- case_when(
  grepl("^L", All_samples_Merged$cell_line) ~ "Cell_Line",
  All_samples_Merged$cell_line == "PBMC" ~ "PBMC",
  All_samples_Merged$cell_line == "PBMC_10x" ~ "PBMC_10x",
  TRUE ~ "Other"
)

# Create the cell line grouping correctly
All_samples_Merged$cell_line_group <- case_when(
  grepl("^L[1-2]", All_samples_Merged$cell_line) ~ "P1",
  grepl("^L[3-4]", All_samples_Merged$cell_line) ~ "P2",
  grepl("^L[5-7]", All_samples_Merged$cell_line) ~ "P3",
  All_samples_Merged$cell_line == "PBMC" ~ "PBMC",
  All_samples_Merged$cell_line == "PBMC_10x" ~ "PBMC_10x",
  TRUE ~ "Other"  # This catches any unexpected values
)

library(harmony)

All_samples_Merged <- RunHarmony(
  object = All_samples_Merged,
  group.by.vars = "sample_group",
  dims.use = 1:22,  # Increased to capture more variation
  theta = c(0.5),  # Adjust these values as needed
  plot_convergence = TRUE
)

# Run UMAP on the new Harmony reduction
All_samples_Merged <- RunUMAP(All_samples_Merged, reduction = "harmony", dims = 1:22, reduction.name = "umap.harmony")

# Find neighbors and clusters using the Harmony reduction
All_samples_Merged <- FindNeighbors(All_samples_Merged, reduction = "harmony", dims = 1:22)
All_samples_Merged <- FindClusters(All_samples_Merged, resolution = 0.5)

Harmony Visualization-3



p1 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "sample_group", label = T, label.box = T)
p2 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line_group",label = T, label.box = T)
p3 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line",label = T, label.box = T)

p1 + p2 + p3

DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "sample_group",label = T, label.box = T)
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line_group",label = T, label.box = T)
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line",label = T, label.box = T)



# Compare with original UMAP
p4 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T) + 
  ggtitle("Original Integration - By Cell Line")
p5 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Original Integration - By Clusters")

# Print the plots
print(p4 + p5)

DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T) + 
  ggtitle("Original Integration - By Cell Line")
DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Original Integration - By Clusters")

# Visualize results
p6 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Cell Line")
p7 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Clusters")
# Print the plots
print(p6 + p7)

DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Cell Line")
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Clusters")
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "predicted.celltype.l2",label = T, label.box = T) + 
  ggtitle("Harmony Integration - Annotations")

6. Data PREPARATION and Harmony Integration-4


# Create a new metadata column for grouping
All_samples_Merged$sample_group <- case_when(
  grepl("^L", All_samples_Merged$cell_line) ~ "Cell_Line",
  All_samples_Merged$cell_line == "PBMC" ~ "PBMC",
  All_samples_Merged$cell_line == "PBMC_10x" ~ "PBMC_10x",
  TRUE ~ "Other"
)

# Create the cell line grouping correctly
All_samples_Merged$cell_line_group <- case_when(
  grepl("^L[1-2]", All_samples_Merged$cell_line) ~ "P1",
  grepl("^L[3-4]", All_samples_Merged$cell_line) ~ "P2",
  grepl("^L[5-7]", All_samples_Merged$cell_line) ~ "P3",
  All_samples_Merged$cell_line == "PBMC" ~ "PBMC",
  All_samples_Merged$cell_line == "PBMC_10x" ~ "PBMC_10x",
  TRUE ~ "Other"  # This catches any unexpected values
)

library(harmony)

All_samples_Merged <- RunHarmony(
  object = All_samples_Merged,
  group.by.vars = "cell_line_group",
  dims.use = 1:22,  # Increased to capture more variation
  theta = c(0.5),  # Adjust these values as needed
  plot_convergence = TRUE
)

# Run UMAP on the new Harmony reduction
All_samples_Merged <- RunUMAP(All_samples_Merged, reduction = "harmony", dims = 1:22, reduction.name = "umap.harmony")

# Find neighbors and clusters using the Harmony reduction
All_samples_Merged <- FindNeighbors(All_samples_Merged, reduction = "harmony", dims = 1:22)
All_samples_Merged <- FindClusters(All_samples_Merged, resolution = 0.5)

Harmony Visualization-4



p1 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "sample_group", label = T, label.box = T)
p2 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line_group",label = T, label.box = T)
p3 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line",label = T, label.box = T)

p1 + p2 + p3

DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "sample_group",label = T, label.box = T)
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line_group",label = T, label.box = T)
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line",label = T, label.box = T)



# Compare with original UMAP
p4 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T) + 
  ggtitle("Original Integration - By Cell Line")
p5 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Original Integration - By Clusters")

# Print the plots
print(p4 + p5)

DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T) + 
  ggtitle("Original Integration - By Cell Line")
DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Original Integration - By Clusters")

# Visualize results
p6 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Cell Line")
p7 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Clusters")
# Print the plots
print(p6 + p7)

DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Cell Line")
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Clusters")
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "predicted.celltype.l2",label = T, label.box = T) + 
  ggtitle("Harmony Integration - Annotations")

7. Data PREPARATION and Harmony Integration-5


# Create a new metadata column for grouping
All_samples_Merged$sample_group <- case_when(
  grepl("^L", All_samples_Merged$cell_line) ~ "Cell_Line",
  All_samples_Merged$cell_line == "PBMC" ~ "PBMC",
  All_samples_Merged$cell_line == "PBMC_10x" ~ "PBMC_10x",
  TRUE ~ "Other"
)

# Create the cell line grouping correctly
All_samples_Merged$cell_line_group <- case_when(
  grepl("^L[1-2]", All_samples_Merged$cell_line) ~ "P1",
  grepl("^L[3-4]", All_samples_Merged$cell_line) ~ "P2",
  grepl("^L[5-7]", All_samples_Merged$cell_line) ~ "P3",
  All_samples_Merged$cell_line == "PBMC" ~ "PBMC",
  All_samples_Merged$cell_line == "PBMC_10x" ~ "PBMC_10x",
  TRUE ~ "Other"  # This catches any unexpected values
)

library(harmony)

All_samples_Merged <- RunHarmony(
  object = All_samples_Merged,
  group.by.vars = "cell_line",
  dims.use = 1:22,  # Increased to capture more variation
  theta = c(0.5),  # Adjust these values as needed
  plot_convergence = TRUE
)

# Run UMAP on the new Harmony reduction
All_samples_Merged <- RunUMAP(All_samples_Merged, reduction = "harmony", dims = 1:22, reduction.name = "umap.harmony")

# Find neighbors and clusters using the Harmony reduction
All_samples_Merged <- FindNeighbors(All_samples_Merged, reduction = "harmony", dims = 1:22)
All_samples_Merged <- FindClusters(All_samples_Merged, resolution = 0.5)

Harmony Visualization-5



p1 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "sample_group", label = T, label.box = T)
p2 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line_group",label = T, label.box = T)
p3 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line",label = T, label.box = T)

p1 + p2 + p3

DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "sample_group",label = T, label.box = T)
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line_group",label = T, label.box = T)
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line",label = T, label.box = T)



# Compare with original UMAP
p4 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T) + 
  ggtitle("Original Integration - By Cell Line")
p5 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Original Integration - By Clusters")

# Print the plots
print(p4 + p5)

DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T) + 
  ggtitle("Original Integration - By Cell Line")
DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Original Integration - By Clusters")

# Visualize results
p6 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Cell Line")
p7 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Clusters")
# Print the plots
print(p6 + p7)

DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Cell Line")
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Clusters")
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "predicted.celltype.l2",label = T, label.box = T) + 
  ggtitle("Harmony Integration - Annotations")

Marker Gene Visualization



# Set marker genes specific to requested immune cell types
myfeatures <- c("CD19", "CD79A", "MS4A1", # B cells
                "CD14", "LYZ", "FCGR3A", # Monocytes
                "CSF1R", "CD68", # Macrophages
                "NKG7", "GNLY", "KIR3DL1", # NK cells
                "MKI67", # Proliferating NK cells
                "CD34", "KIT", # HSPCs
                "CD3E", "CCR7", # T cells
                "SELL", "CD45RO", # Tnaive, Tcm
                "CD44", "CD45RA") # Tem, Temra
# Visualize marker genes for Harmony
FeaturePlot(All_samples_Merged, features = myfeatures, reduction = "umap.harmony", ncol = 4) + 
  ggtitle("Marker Gene Expression - Harmony Integration") +
  NoLegend()

6. Save the Seurat object as an Robj file


#save(All_samples_Merged, file = "integrated_All_samples_Merged_with_PBMC10x.Robj")
---
title: "Multiple Harmony integrations of PBMC10x-part3"
author: Nasir Mahmood Abbasi
date: "`r Sys.Date()`"
output:
  #rmdformats::readthedown
  html_notebook:
    toc: true
    toc_float: true
    toc_collapsed: true
---

# 1. load libraries
```{r setup, include=FALSE}
library(Seurat)
library(SeuratWrappers)
library(SeuratObject)
library(SeuratData)
library(patchwork)
library(harmony)
library(ggplot2)
library(reticulate)
library(Azimuth)
library(dplyr)
library(Rtsne)
library(harmony)

options(future.globals.maxSize = 1e9)

```




# 2. Load Seurat Object 
```{r load_seurat}

#Load Seurat Object merged from cell lines and a control(PBMC) after filtration
load("../../../0-IMP-OBJECTS/All_Samples_Merged_with_10x_Azitmuth_Annotated_SCT_HPC_without_harmony_integration.robj")

All_samples_Merged

```

# 3. Data PREPARATION and Harmony Integration-1
```{r data, fig.height=8, fig.width=12}

library(harmony)

All_samples_Merged <- RunHarmony(
  object = All_samples_Merged,
  group.by.vars = "cell_line",  # Replace with the metadata column specifying batch or cell line
  dims.use = 1:22,  # Use the same dimensions as PCA
  assay.use = "SCT",
plot_convergence = TRUE
  )

# Run UMAP on the new Harmony reduction
All_samples_Merged <- RunUMAP(All_samples_Merged, reduction = "harmony", dims = 1:22, reduction.name = "umap.harmony")

# Find neighbors and clusters using the Harmony reduction
All_samples_Merged <- FindNeighbors(All_samples_Merged, reduction = "harmony", dims = 1:22)
All_samples_Merged <- FindClusters(All_samples_Merged, resolution = 0.5)

# Visualize results
p1 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line") + 
  ggtitle("Harmony Integration - By Cell Line")
p2 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters") + 
  ggtitle("Harmony Integration - By Clusters")

# Compare with original UMAP
p3 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line") + 
  ggtitle("Original Integration - By Cell Line")
p4 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters") + 
  ggtitle("Original Integration - By Clusters")

# Print the plots
print(p1 + p2)
print(p3 + p4)


```

##  Harmony Visualization-1
```{r harmony-visualization1, fig.height=8, fig.width=12}

# Visualize results
p1 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Cell Line")
p2 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Clusters")

# Compare with original UMAP
p3 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T) + 
  ggtitle("Original Integration - By Cell Line")
p4 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Original Integration - By Clusters")

# Print the plots
print(p1 + p2)
print(p3 + p4)

```


# 4. Data PREPARATION and Harmony Integration-2
```{r data2, fig.height=8, fig.width=12}

library(harmony)

All_samples_Merged <- RunHarmony(
  object = All_samples_Merged,
  group.by.vars = "cell_line",  # Replace with the metadata column specifying batch or cell line
  dims.use = 1:22,  # Use the same dimensions as PCA
  assay.use = "SCT",
  Theta = 0.5,
  lambda =0.5,
  plot_convergence = TRUE
  )

# Run UMAP on the new Harmony reduction
All_samples_Merged <- RunUMAP(All_samples_Merged, reduction = "harmony", dims = 1:22, reduction.name = "umap.harmony")

# Find neighbors and clusters using the Harmony reduction
All_samples_Merged <- FindNeighbors(All_samples_Merged, reduction = "harmony", dims = 1:22)
All_samples_Merged <- FindClusters(All_samples_Merged, resolution = 0.5)
```

##  Harmony Visualization-2
```{r harmony-visualization2, fig.height=8, fig.width=12}

# Visualize results
p1 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Cell Line")
p2 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Clusters")

# Compare with original UMAP
p3 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T) + 
  ggtitle("Original Integration - By Cell Line")
p4 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Original Integration - By Clusters")

# Print the plots
print(p1 + p2)
print(p3 + p4)
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Cell Line")
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Clusters")
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "predicted.celltype.l2",label = T, label.box = T) + 
  ggtitle("Harmony Integration - Annotations")

```

# 5. Data PREPARATION and Harmony Integration-3
```{r data3, fig.height=8, fig.width=12}

# Create a new metadata column for grouping
All_samples_Merged$sample_group <- case_when(
  grepl("^L", All_samples_Merged$cell_line) ~ "Cell_Line",
  All_samples_Merged$cell_line == "PBMC" ~ "PBMC",
  All_samples_Merged$cell_line == "PBMC_10x" ~ "PBMC_10x",
  TRUE ~ "Other"
)

# Create the cell line grouping correctly
All_samples_Merged$cell_line_group <- case_when(
  grepl("^L[1-2]", All_samples_Merged$cell_line) ~ "P1",
  grepl("^L[3-4]", All_samples_Merged$cell_line) ~ "P2",
  grepl("^L[5-7]", All_samples_Merged$cell_line) ~ "P3",
  All_samples_Merged$cell_line == "PBMC" ~ "PBMC",
  All_samples_Merged$cell_line == "PBMC_10x" ~ "PBMC_10x",
  TRUE ~ "Other"  # This catches any unexpected values
)

library(harmony)

All_samples_Merged <- RunHarmony(
  object = All_samples_Merged,
  group.by.vars = "sample_group",
  dims.use = 1:22,  # Increased to capture more variation
  theta = c(0.5),  # Adjust these values as needed
  plot_convergence = TRUE
)

# Run UMAP on the new Harmony reduction
All_samples_Merged <- RunUMAP(All_samples_Merged, reduction = "harmony", dims = 1:22, reduction.name = "umap.harmony")

# Find neighbors and clusters using the Harmony reduction
All_samples_Merged <- FindNeighbors(All_samples_Merged, reduction = "harmony", dims = 1:22)
All_samples_Merged <- FindClusters(All_samples_Merged, resolution = 0.5)

```

##  Harmony Visualization-3
```{r harmony-visualization3, fig.height=8, fig.width=12}


p1 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "sample_group", label = T, label.box = T)
p2 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line_group",label = T, label.box = T)
p3 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line",label = T, label.box = T)

p1 + p2 + p3

DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "sample_group",label = T, label.box = T)
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line_group",label = T, label.box = T)
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line",label = T, label.box = T)



# Compare with original UMAP
p4 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T) + 
  ggtitle("Original Integration - By Cell Line")
p5 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Original Integration - By Clusters")

# Print the plots
print(p4 + p5)

DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T) + 
  ggtitle("Original Integration - By Cell Line")
DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Original Integration - By Clusters")

# Visualize results
p6 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Cell Line")
p7 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Clusters")
# Print the plots
print(p6 + p7)

DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Cell Line")
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Clusters")
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "predicted.celltype.l2",label = T, label.box = T) + 
  ggtitle("Harmony Integration - Annotations")

```
# 6. Data PREPARATION and Harmony Integration-4
```{r data4, fig.height=8, fig.width=12}

# Create a new metadata column for grouping
All_samples_Merged$sample_group <- case_when(
  grepl("^L", All_samples_Merged$cell_line) ~ "Cell_Line",
  All_samples_Merged$cell_line == "PBMC" ~ "PBMC",
  All_samples_Merged$cell_line == "PBMC_10x" ~ "PBMC_10x",
  TRUE ~ "Other"
)

# Create the cell line grouping correctly
All_samples_Merged$cell_line_group <- case_when(
  grepl("^L[1-2]", All_samples_Merged$cell_line) ~ "P1",
  grepl("^L[3-4]", All_samples_Merged$cell_line) ~ "P2",
  grepl("^L[5-7]", All_samples_Merged$cell_line) ~ "P3",
  All_samples_Merged$cell_line == "PBMC" ~ "PBMC",
  All_samples_Merged$cell_line == "PBMC_10x" ~ "PBMC_10x",
  TRUE ~ "Other"  # This catches any unexpected values
)

library(harmony)

All_samples_Merged <- RunHarmony(
  object = All_samples_Merged,
  group.by.vars = "cell_line_group",
  dims.use = 1:22,  # Increased to capture more variation
  theta = c(0.5),  # Adjust these values as needed
  plot_convergence = TRUE
)

# Run UMAP on the new Harmony reduction
All_samples_Merged <- RunUMAP(All_samples_Merged, reduction = "harmony", dims = 1:22, reduction.name = "umap.harmony")

# Find neighbors and clusters using the Harmony reduction
All_samples_Merged <- FindNeighbors(All_samples_Merged, reduction = "harmony", dims = 1:22)
All_samples_Merged <- FindClusters(All_samples_Merged, resolution = 0.5)

```

##  Harmony Visualization-4
```{r harmony-visualization4, fig.height=8, fig.width=12}


p1 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "sample_group", label = T, label.box = T)
p2 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line_group",label = T, label.box = T)
p3 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line",label = T, label.box = T)

p1 + p2 + p3

DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "sample_group",label = T, label.box = T)
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line_group",label = T, label.box = T)
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line",label = T, label.box = T)



# Compare with original UMAP
p4 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T) + 
  ggtitle("Original Integration - By Cell Line")
p5 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Original Integration - By Clusters")

# Print the plots
print(p4 + p5)

DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T) + 
  ggtitle("Original Integration - By Cell Line")
DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Original Integration - By Clusters")

# Visualize results
p6 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Cell Line")
p7 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Clusters")
# Print the plots
print(p6 + p7)

DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Cell Line")
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Clusters")
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "predicted.celltype.l2",label = T, label.box = T) + 
  ggtitle("Harmony Integration - Annotations")

```


# 7. Data PREPARATION and Harmony Integration-5
```{r data5, fig.height=8, fig.width=12}

# Create a new metadata column for grouping
All_samples_Merged$sample_group <- case_when(
  grepl("^L", All_samples_Merged$cell_line) ~ "Cell_Line",
  All_samples_Merged$cell_line == "PBMC" ~ "PBMC",
  All_samples_Merged$cell_line == "PBMC_10x" ~ "PBMC_10x",
  TRUE ~ "Other"
)

# Create the cell line grouping correctly
All_samples_Merged$cell_line_group <- case_when(
  grepl("^L[1-2]", All_samples_Merged$cell_line) ~ "P1",
  grepl("^L[3-4]", All_samples_Merged$cell_line) ~ "P2",
  grepl("^L[5-7]", All_samples_Merged$cell_line) ~ "P3",
  All_samples_Merged$cell_line == "PBMC" ~ "PBMC",
  All_samples_Merged$cell_line == "PBMC_10x" ~ "PBMC_10x",
  TRUE ~ "Other"  # This catches any unexpected values
)

library(harmony)

All_samples_Merged <- RunHarmony(
  object = All_samples_Merged,
  group.by.vars = "cell_line",
  dims.use = 1:22,  # Increased to capture more variation
  theta = c(0.5),  # Adjust these values as needed
  plot_convergence = TRUE
)

# Run UMAP on the new Harmony reduction
All_samples_Merged <- RunUMAP(All_samples_Merged, reduction = "harmony", dims = 1:22, reduction.name = "umap.harmony")

# Find neighbors and clusters using the Harmony reduction
All_samples_Merged <- FindNeighbors(All_samples_Merged, reduction = "harmony", dims = 1:22)
All_samples_Merged <- FindClusters(All_samples_Merged, resolution = 0.5)

```

##  Harmony Visualization-5
```{r harmony-visualization5, fig.height=8, fig.width=12}


p1 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "sample_group", label = T, label.box = T)
p2 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line_group",label = T, label.box = T)
p3 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line",label = T, label.box = T)

p1 + p2 + p3

DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "sample_group",label = T, label.box = T)
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line_group",label = T, label.box = T)
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line",label = T, label.box = T)



# Compare with original UMAP
p4 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T) + 
  ggtitle("Original Integration - By Cell Line")
p5 <- DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Original Integration - By Clusters")

# Print the plots
print(p4 + p5)

DimPlot(All_samples_Merged, reduction = "umap", group.by = "cell_line",label = T, label.box = T) + 
  ggtitle("Original Integration - By Cell Line")
DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Original Integration - By Clusters")

# Visualize results
p6 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Cell Line")
p7 <- DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Clusters")
# Print the plots
print(p6 + p7)

DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line", label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Cell Line")
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "seurat_clusters",label = T, label.box = T) + 
  ggtitle("Harmony Integration - By Clusters")
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "predicted.celltype.l2",label = T, label.box = T) + 
  ggtitle("Harmony Integration - Annotations")

```

##  Marker Gene Visualization
```{r featureplot-harmony, fig.height=14, fig.width=18}


# Set marker genes specific to requested immune cell types
myfeatures <- c("CD19", "CD79A", "MS4A1", # B cells
                "CD14", "LYZ", "FCGR3A", # Monocytes
                "CSF1R", "CD68", # Macrophages
                "NKG7", "GNLY", "KIR3DL1", # NK cells
                "MKI67", # Proliferating NK cells
                "CD34", "KIT", # HSPCs
                "CD3E", "CCR7", # T cells
                "SELL", "CD45RO", # Tnaive, Tcm
                "CD44", "CD45RA") # Tem, Temra
# Visualize marker genes for Harmony
FeaturePlot(All_samples_Merged, features = myfeatures, reduction = "umap.harmony", ncol = 4) + 
  ggtitle("Marker Gene Expression - Harmony Integration") +
  NoLegend()
```

# 6. Save the Seurat object as an Robj file
```{r saveROBJ}

#save(All_samples_Merged, file = "integrated_All_samples_Merged_with_PBMC10x.Robj")

```




