1. load libraries

2. Load Seurat Object


load("AllSample_corrected_AzimuthAnnotated_L1.Robj")

All_samples_Merged

3. Data PREPERATION


alldata <- All_samples_Merged

alldata.list <- SplitObject(alldata, split.by = "orig.ident")

for (i in 1:length(alldata.list)) {
    alldata.list[[i]] <- NormalizeData(alldata.list[[i]], verbose = FALSE)
    alldata.list[[i]] <- FindVariableFeatures(alldata.list[[i]], selection.method = "vst", nfeatures = 2000,verbose = FALSE)
}

# get the variable genes from all the datasets.
hvgs_per_dataset <- lapply(alldata.list, function(x) { x@assays$RNA@var.features })

# also add in the variable genes that was selected on the whole dataset
hvgs_per_dataset$all = VariableFeatures(alldata)

temp <- unique(unlist(hvgs_per_dataset))
overlap <- sapply( hvgs_per_dataset , function(x) { temp %in% x } )
pheatmap::pheatmap(t(overlap*1),cluster_rows = F ,
                   color = c("grey90","grey20"))

hvgs_all = SelectIntegrationFeatures(alldata.list)
hvgs_per_dataset$all_ranks = hvgs_all

temp <- unique(unlist(hvgs_per_dataset))
overlap <- sapply( hvgs_per_dataset , function(x) { temp %in% x } )
pheatmap::pheatmap(t(overlap*1),cluster_rows = F ,
                   color = c("grey90","grey20"))


alldata.list <- lapply(X = alldata.list, FUN = function(x) {
    x <- ScaleData(x, features = hvgs_all, verbose = FALSE)
    x <- RunPCA(x, features = hvgs_all, verbose = FALSE)
})

4. rpca-integration

5. Harmony-integration

FeaturePlot



myfeatures <- c("CD3E", "CD4", "CD8A", "NKG7", "GNLY", "MS4A1", "CD14", "LYZ", "MS4A7", "FCGR3A", "CST3", "FCER1A")
FeaturePlot(alldata.int, reduction = "umap_harmony", dims = 1:2, features = myfeatures, ncol = 4, order = T) + NoLegend() + NoAxes() + NoGrid()


FeaturePlot(alldata.int, reduction = "umap_rpca", dims = 1:2, features = myfeatures, ncol = 4, order = T) + NoLegend() + NoAxes() + NoGrid()

6. Save the Seurat object as an Robj file


save(alldata.int, file = "Integrated_by_rpca_Harmony.Robj")
---
title: "Integration by rpca-Harmony"
author: Nasir Mahmood Abbasi
date: "2024-04-03"
output:
  html_notebook: 
    toc: true
    toc_float: true
    toc_collapsed: true
    theme: darkly
---
# 1. load libraries
```{r setup, include=FALSE}
library(Seurat)
library(SeuratObject)
library(SeuratData)
library(patchwork)
library(harmony)
library(ggplot2)
library(reticulate)
library(Azimuth)
library(dplyr)
library(Rtsne)
library(harmony)

```

# 2. Load Seurat Object 
```{r load_seurat}

load("AllSample_corrected_AzimuthAnnotated_L1.Robj")

All_samples_Merged

```


# 3. Data PREPERATION
```{r data, fig.height=6, fig.width=10}

alldata <- All_samples_Merged

alldata.list <- SplitObject(alldata, split.by = "orig.ident")

for (i in 1:length(alldata.list)) {
    alldata.list[[i]] <- NormalizeData(alldata.list[[i]], verbose = FALSE)
    alldata.list[[i]] <- FindVariableFeatures(alldata.list[[i]], selection.method = "vst", nfeatures = 2000,verbose = FALSE)
}

# get the variable genes from all the datasets.
hvgs_per_dataset <- lapply(alldata.list, function(x) { x@assays$RNA@var.features })

# also add in the variable genes that was selected on the whole dataset
hvgs_per_dataset$all = VariableFeatures(alldata)

temp <- unique(unlist(hvgs_per_dataset))
overlap <- sapply( hvgs_per_dataset , function(x) { temp %in% x } )
pheatmap::pheatmap(t(overlap*1),cluster_rows = F ,
                   color = c("grey90","grey20"))

hvgs_all = SelectIntegrationFeatures(alldata.list)
hvgs_per_dataset$all_ranks = hvgs_all

temp <- unique(unlist(hvgs_per_dataset))
overlap <- sapply( hvgs_per_dataset , function(x) { temp %in% x } )
pheatmap::pheatmap(t(overlap*1),cluster_rows = F ,
                   color = c("grey90","grey20"))


alldata.list <- lapply(X = alldata.list, FUN = function(x) {
    x <- ScaleData(x, features = hvgs_all, verbose = FALSE)
    x <- RunPCA(x, features = hvgs_all, verbose = FALSE)
})
```


# 4. rpca-integration
```{r integration-rpca, fig.height=6, fig.width=10}
# Exclude genes starting with "HLA-" or "Xist"
hvgs_all_final <- hvgs_all[!grepl("^HLA-|^Xist", hvgs_all)]

alldata.anchors <- FindIntegrationAnchors(object.list = alldata.list, dims = 1:12, reduction = "rpca", anchor.features = hvgs_all_final)

alldata.int <- IntegrateData(anchorset = alldata.anchors, dims = 1:12, new.assay.name = "rpca")

names(alldata.int@assays)

alldata.int@active.assay

DefaultAssay(alldata.int) <- "rpca"

#Run Dimensionality reduction on integrated space
alldata.int <- ScaleData(alldata.int, verbose = FALSE)
alldata.int <- RunPCA(alldata.int, features = hvgs_all, reduction.name = "pca_rpca", do.print = TRUE, pcs.print = 1:5, genes.print = 15, verbose = FALSE)
alldata.int <- RunUMAP(alldata.int, reduction = "pca_rpca", reduction.name = "umap_rpca", dims = 1:12, verbose = FALSE)
alldata.int <- RunTSNE(alldata.int, reduction = "pca_rpca", reduction.name = "tsne_rpca", dims = 1:12, verbose = FALSE)
alldata.int <- FindNeighbors(alldata.int, reduction = "pca_rpca", dims = 1:12, verbose = FALSE)
alldata.int <- FindClusters(alldata.int, resolution = 1.2, verbose = FALSE)



wrap_plots(

    
    DimPlot(alldata.int, reduction = "umap_rpca", group.by = "orig.ident")+NoAxes()+ggtitle("UMAP integrated"),
    DimPlot(alldata.int, reduction = "umap_rpca", group.by = "orig.ident", label = TRUE, label.box = TRUE, repel = TRUE)+NoAxes()+ggtitle("UMAP integrated"),
    
    DimPlot(alldata.int, reduction = "umap_rpca", group.by = "rpca_snn_res.1.2")+NoAxes()+ggtitle("UMAP integrated"),
    DimPlot(alldata.int, reduction = "umap_rpca", group.by = "rpca_snn_res.1.2", label = TRUE, label.box = TRUE, repel = TRUE)+ggtitle("UMAP integrated"),
    
    DimPlot(alldata.int, reduction = "umap_rpca", group.by = "predicted.celltype.l2")+ggtitle("UMAP integrated"),
    DimPlot(alldata.int, reduction = "umap_rpca", group.by = "predicted.celltype.l2", label = TRUE, label.box = TRUE, repel = TRUE)+ggtitle("UMAP integrated"),
    
    ncol = 2) + plot_layout(guides = "collect")

```










    


# 5. Harmony-integration
```{r Harmony_integration, fig.height=6, fig.width=10}

alldata.int@active.assay = "RNA"

VariableFeatures(alldata.int) = hvgs_all_final
alldata.int = ScaleData(alldata.int, verbose = TRUE)
alldata.int = RunPCA(alldata.int, reduction.name = "pca_harmony")


alldata.int <- RunHarmony(
  alldata.int,
  group.by.vars = "orig.ident",
  reduction.use = "pca_harmony",
  dims.use = 1:12,
  assay.use = "RNA")


alldata.int <- RunUMAP(alldata.int, dims = 1:12, reduction = "harmony", reduction.name = "umap_harmony")
alldata.int <- RunTSNE(alldata.int, dims = 1:12, reduction = "harmony", reduction.name = "tsne_harmony")
alldata.int <- FindNeighbors(alldata.int, reduction = "pca_harmony", dims = 1:12, verbose = FALSE)
alldata.int <- FindClusters(alldata.int, resolution = 1.2, verbose = FALSE)


 
 
 wrap_plots(

    
    DimPlot(alldata.int, reduction = "umap_harmony", group.by = "orig.ident")+NoAxes()+ggtitle("UMAP integrated"),
    DimPlot(alldata.int, reduction = "umap_harmony", group.by = "orig.ident", label = TRUE, label.box = TRUE, repel = TRUE)+NoAxes()+ggtitle("UMAP integrated"),
    
    DimPlot(alldata.int, reduction = "umap_harmony", group.by = "RNA_snn_res.1.2")+NoAxes()+ggtitle("UMAP integrated"),
    DimPlot(alldata.int, reduction = "umap_harmony", group.by = "RNA_snn_res.1.2", label = TRUE, label.box = TRUE, repel = TRUE)+NoAxes()+ggtitle("UMAP integrated"),
    
    DimPlot(alldata.int, reduction = "umap_harmony", group.by = "predicted.celltype.l2")+NoAxes()+ggtitle("UMAP integrated"),
     DimPlot(alldata.int, reduction = "umap_harmony", group.by = "predicted.celltype.l2", label = TRUE, label.box = TRUE, repel = TRUE)+NoAxes()+ggtitle("UMAP integrated"),
    ncol = 2) + plot_layout(guides = "collect")



```

# FeaturePlot
```{r featureplot-harmony, fig.height=6, fig.width=10}


myfeatures <- c("CD3E", "CD4", "CD8A", "NKG7", "GNLY", "MS4A1", "CD14", "LYZ", "MS4A7", "FCGR3A", "CST3", "FCER1A")
FeaturePlot(alldata.int, reduction = "umap_harmony", dims = 1:2, features = myfeatures, ncol = 4, order = T) + NoLegend() + NoAxes() + NoGrid()

FeaturePlot(alldata.int, reduction = "umap_rpca", dims = 1:2, features = myfeatures, ncol = 4, order = T) + NoLegend() + NoAxes() + NoGrid()
```


# 6. Save the Seurat object as an Robj file
```{r saveROBJ}

save(alldata.int, file = "Integrated_by_rpca_Harmony.Robj")


```




