1. load libraries
2. Load Seurat Object
#Load Seurat Object merged from cell lines and a control after filtration
load("../0-robj/5-Harmony_Integrated_All_samples_Merged_CD4Tcells_final_Resolution_Selected_0.8_ADT_Normalized_cleaned_mt.robj")
3. Visulization of Harmony integrated Object
DefaultAssay(All_samples_Merged) <- "SCT"
DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters", label = TRUE, label.box = T)

DimPlot(All_samples_Merged, reduction = "umap", group.by = "orig.ident", label = TRUE, label.box = T)

DimPlot(All_samples_Merged, group.by = "predicted.celltype.l2", label = F)

3. DE Analysis Using RNA Assay (Log-Normalized Counts)
# Set the default assay to RNA
DefaultAssay(All_samples_Merged) <- "RNA"
# Perform LogNormalization on the RNA assay
All_samples_Merged <- NormalizeData(All_samples_Merged, normalization.method = "LogNormalize", scale.factor = 10000)
Normalizing layer: counts
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
# Set identity class to cell line
Idents(All_samples_Merged) <- "cell_line"
# Define malignant and normal cell lines
malignant_cell_line <- c("L1", "L2", "L3", "L4", "L5", "L6", "L7")
normal_cell_line <- c("PBMC", "PBMC_10x")
# Perform DE analysis using RNA assay
DE_RNA <- FindMarkers(
object = All_samples_Merged,
ident.1 = malignant_cell_line,
ident.2 = normal_cell_line,
assay = "RNA", # Use RNA-normalized counts
min.pct = 0,
logfc.threshold = 0
)
# Convert to data frame and add gene names
DE_RNA <- as.data.frame(DE_RNA)
DE_RNA$gene <- rownames(DE_RNA)
# Reorder columns
DE_RNA <- DE_RNA[, c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")]
# Save results
write.csv(DE_RNA, "Malignant_CD4Tcells_vs_Normal_CD4Tcells_RNA_Assay.csv", row.names = FALSE)
5. DE Analysis Visualize Results of RNA vs SCT
comparison_avg_log2FC_RNA
library(ggplot2)
common_genes <- intersect(DE_RNA$gene, DE_SCT$gene)
RNA_values <- DE_RNA[DE_RNA$gene %in% common_genes, ]
SCT_values <- DE_SCT[DE_SCT$gene %in% common_genes, ]
comparison_df <- merge(RNA_values, SCT_values, by = "gene", suffixes = c("_RNA", "_SCT"))
ggplot(comparison_df, aes(x = avg_log2FC_RNA, y = avg_log2FC_SCT)) +
geom_point() +
geom_abline(slope = 1, intercept = 0, color = "red") +
theme_minimal() +
labs(title = "Comparison of DE Results: RNA vs SCT", x = "Log2FC (RNA)", y = "Log2FC (SCT)")

6. DE Analysis Visualize Results of RNA vs SCT
comparison_p_val_adj
library(ggplot2)
# Find common genes present in both DE results
common_genes <- intersect(DE_RNA$gene, DE_SCT$gene)
# Extract the values for the common genes
RNA_values <- DE_RNA[DE_RNA$gene %in% common_genes, ]
SCT_values <- DE_SCT[DE_SCT$gene %in% common_genes, ]
# Merge both datasets on "gene" column
comparison_df <- merge(RNA_values, SCT_values, by = "gene", suffixes = c("_RNA", "_SCT"))
# Plot adjusted p-values (log-transformed for better visualization)
ggplot(comparison_df, aes(x = -log10(p_val_adj_RNA), y = -log10(p_val_adj_SCT))) +
geom_point(alpha = 0.5) + # Transparency for better visibility
geom_abline(slope = 1, intercept = 0, color = "red", linetype = "dashed") + # Diagonal reference line
theme_minimal() +
labs(
title = "Comparison of Adjusted p-values: RNA vs SCT",
x = "-log10 Adjusted p-value (RNA)",
y = "-log10 Adjusted p-value (SCT)"
) +
theme(
plot.title = element_text(hjust = 0.5), # Center the title
axis.text = element_text(size = 12),
axis.title = element_text(size = 14)
)

NA
NA
Why Use MAST? Handles zero inflation (dropouts) in
single-cell RNA-seq data. Accounts for technical and biological
variation. Works well when adjusting for covariates (Patient_origin
& cell_line).
7. DE Analysis Using RNA Assay (Log-Normalized Counts) + MAST
# # Load required package for MAST
# library(MAST)
#
# # Set the default assay to RNA
# DefaultAssay(All_samples_Merged) <- "RNA"
#
# # Perform LogNormalization on the RNA assay
# All_samples_Merged <- NormalizeData(All_samples_Merged, normalization.method = "LogNormalize", scale.factor = 10000)
DefaultAssay(All_samples_Merged) <- "RNA"
# Set identity class to cell line
Idents(All_samples_Merged) <- "cell_line"
# Define malignant and normal cell lines
malignant_cell_line_L1 <- c("L1")
normal_CD4Tcells <- c("PBMC", "PBMC_10x")
# Perform DE analysis using RNA assay with MAST and covariates
DE_RNA_MAST <- FindMarkers(
object = All_samples_Merged,
ident.1 = malignant_cell_line_L1,
ident.2 = normal_CD4Tcells,
assay = "RNA", # Use RNA-normalized counts
test.use = "MAST", # Use MAST test
min.pct = 0,
logfc.threshold = 0,
latent.vars = c("Patient_origin", "cell_line") # Adjust for patient and cell line
)
# Convert to data frame and add gene names
DE_RNA_MAST <- as.data.frame(DE_RNA_MAST)
DE_RNA_MAST$gene <- rownames(DE_RNA_MAST)
# Reorder columns
DE_RNA_MAST <- DE_RNA_MAST[, c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")]
# Save results
write.csv(DE_RNA_MAST, "Malignant_CD4Tcells_L1_vs_Normal_CD4Tcells_RNA_MAST.csv", row.names = FALSE)
9. DE Analysis Visualize Results of RNA vs SCT
comparison_avg_log2FC_RNA
# Load required libraries
library(ggplot2)
library(ggrepel) # For better labeling of outlier genes
# Load DE results
DE_RNA_MAST <- read.csv("Malignant_CD4Tcells_L1_vs_Normal_CD4Tcells_RNA_MAST.csv")
DE_SCT_MAST <- read.csv("Malignant_CD4Tcells_L1_vs_Normal_CD4Tcells_SCT_MAST.csv")
# Find common genes in both datasets
common_genes <- intersect(DE_RNA_MAST$gene, DE_SCT_MAST$gene)
# Filter for common genes
RNA_values <- DE_RNA_MAST[DE_RNA_MAST$gene %in% common_genes, ]
SCT_values <- DE_SCT_MAST[DE_SCT_MAST$gene %in% common_genes, ]
# Merge datasets
comparison_df <- merge(RNA_values, SCT_values, by = "gene", suffixes = c("_RNA", "_SCT"))
# ---- PLOT 1: Log2 Fold Change (Log2FC) Comparison ----
p1 <- ggplot(comparison_df, aes(x = avg_log2FC_RNA, y = avg_log2FC_SCT)) +
geom_point(alpha = 0.5) +
geom_abline(slope = 1, intercept = 0, color = "red", linetype = "dashed") + # Diagonal reference line
theme_minimal() +
labs(title = "Log2 Fold Change (Log2FC) Comparison: RNA vs SCT (MAST)",
x = "Log2 Fold Change (RNA)",
y = "Log2 Fold Change (SCT)") +
theme(plot.title = element_text(hjust = 0.5))
# ---- PLOT 2: Adjusted p-value (p_val_adj) Comparison ----
p2 <- ggplot(comparison_df, aes(x = -log10(p_val_adj_RNA), y = -log10(p_val_adj_SCT))) +
geom_point(alpha = 0.5) +
geom_abline(slope = 1, intercept = 0, color = "red", linetype = "dashed") + # Reference line
theme_minimal() +
labs(title = "Adjusted p-value (-log10) Comparison: RNA vs SCT (MAST)",
x = "-log10 Adjusted p-value (RNA)",
y = "-log10 Adjusted p-value (SCT)") +
theme(plot.title = element_text(hjust = 0.5))
# ---- PLOT BOTH ----
library(gridExtra)
grid.arrange(p1, p2, ncol = 2)
---
title: "DE(Malignat_vs_Normal_CD4Tcells) of Harmony Integration"
author: Nasir Mahmood Abbasi
date: "`r Sys.Date()`"
output:
  #rmdformats::readthedown
  html_notebook:
    toc: true
    toc_float: true
    toc_collapsed: true
---


# 1. load libraries
```{r setup, include=FALSE}
library(Seurat)
library(SeuratObject)
library(SeuratData)
library(patchwork)
library(harmony)
library(ggplot2)
library(cowplot)
library(reticulate)
library(Azimuth)
library(dplyr)
library(Rtsne)
library(harmony)
library(gridExtra)
library(EnhancedVolcano)

```


# 2. Load Seurat Object 
```{r , fig.height=8, fig.width=10}

#Load Seurat Object merged from cell lines and a control after filtration
load("../0-robj/5-Harmony_Integrated_All_samples_Merged_CD4Tcells_final_Resolution_Selected_0.8_ADT_Normalized_cleaned_mt.robj")

```


# 3.  Visulization of Harmony integrated Object
```{r , fig.height=8, fig.width=12}
DefaultAssay(All_samples_Merged) <- "SCT"

DimPlot(All_samples_Merged, reduction = "umap", group.by = "seurat_clusters", label = TRUE, label.box = T)
DimPlot(All_samples_Merged, reduction = "umap", group.by = "orig.ident", label = TRUE, label.box = T)
DimPlot(All_samples_Merged, group.by = "predicted.celltype.l2", label = F)

```

# 3. DE Analysis Using RNA Assay (Log-Normalized Counts)
```{r , fig.height=8, fig.width=12}

# Set the default assay to RNA
DefaultAssay(All_samples_Merged) <- "RNA"

# Perform LogNormalization on the RNA assay
All_samples_Merged <- NormalizeData(All_samples_Merged, normalization.method = "LogNormalize", scale.factor = 10000)

# Set identity class to cell line
Idents(All_samples_Merged) <- "cell_line"

# Define malignant and normal cell lines
malignant_cell_line <- c("L1", "L2", "L3", "L4", "L5", "L6", "L7")  
normal_cell_line <- c("PBMC", "PBMC_10x")  

# Perform DE analysis using RNA assay
DE_RNA <- FindMarkers(
  object = All_samples_Merged, 
  ident.1 = malignant_cell_line,
  ident.2 = normal_cell_line,
  assay = "RNA",   # Use RNA-normalized counts
  min.pct = 0,
  logfc.threshold = 0
)

# Convert to data frame and add gene names
DE_RNA <- as.data.frame(DE_RNA)
DE_RNA$gene <- rownames(DE_RNA)

# Reorder columns
DE_RNA <- DE_RNA[, c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")]

# Save results
write.csv(DE_RNA, "Malignant_CD4Tcells_vs_Normal_CD4Tcells_RNA_Assay.csv", row.names = FALSE)

```

# 4. DE Analysis Using SCT Assay (SCTransformed Counts)
```{r , fig.height=8, fig.width=12}

# Set the default assay to SCT
DefaultAssay(All_samples_Merged) <- "SCT"

# Set identity class to cell line
Idents(All_samples_Merged) <- "cell_line"

# Define malignant and normal cell lines
malignant_cell_line <- c("L1", "L2", "L3", "L4", "L5", "L6", "L7")  
normal_cell_line <- c("PBMC", "PBMC_10x")  

# Perform DE analysis using SCT assay
DE_SCT <- FindMarkers(
  object = All_samples_Merged, 
  ident.1 = malignant_cell_line,
  ident.2 = normal_cell_line,
  assay = "SCT",   # Use SCT-normalized counts
  min.pct = 0,
  logfc.threshold = 0
)

# Convert to data frame and add gene names
DE_SCT <- as.data.frame(DE_SCT)
DE_SCT$gene <- rownames(DE_SCT)

# Reorder columns
DE_SCT <- DE_SCT[, c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")]

# Save results
write.csv(DE_SCT, "Malignant_CD4Tcells_vs_Normal_CD4Tcells_SCT_SCTransformed.csv", row.names = FALSE)



```


# 5. DE Analysis Visualize Results of RNA vs SCT comparison_avg_log2FC_RNA
```{r , fig.height=8, fig.width=12}

library(ggplot2)
common_genes <- intersect(DE_RNA$gene, DE_SCT$gene)
RNA_values <- DE_RNA[DE_RNA$gene %in% common_genes, ]
SCT_values <- DE_SCT[DE_SCT$gene %in% common_genes, ]

comparison_df <- merge(RNA_values, SCT_values, by = "gene", suffixes = c("_RNA", "_SCT"))
ggplot(comparison_df, aes(x = avg_log2FC_RNA, y = avg_log2FC_SCT)) +
    geom_point() +
    geom_abline(slope = 1, intercept = 0, color = "red") +
    theme_minimal() +
    labs(title = "Comparison of DE Results: RNA vs SCT", x = "Log2FC (RNA)", y = "Log2FC (SCT)")

```
# 6. DE Analysis Visualize Results of RNA vs SCT comparison_p_val_adj
```{r , fig.height=8, fig.width=12}

library(ggplot2)

# Find common genes present in both DE results
common_genes <- intersect(DE_RNA$gene, DE_SCT$gene)

# Extract the values for the common genes
RNA_values <- DE_RNA[DE_RNA$gene %in% common_genes, ]
SCT_values <- DE_SCT[DE_SCT$gene %in% common_genes, ]

# Merge both datasets on "gene" column
comparison_df <- merge(RNA_values, SCT_values, by = "gene", suffixes = c("_RNA", "_SCT"))

# Plot adjusted p-values (log-transformed for better visualization)
ggplot(comparison_df, aes(x = -log10(p_val_adj_RNA), y = -log10(p_val_adj_SCT))) +
    geom_point(alpha = 0.5) +  # Transparency for better visibility
    geom_abline(slope = 1, intercept = 0, color = "red", linetype = "dashed") +  # Diagonal reference line
    theme_minimal() +
    labs(
        title = "Comparison of Adjusted p-values: RNA vs SCT",
        x = "-log10 Adjusted p-value (RNA)",
        y = "-log10 Adjusted p-value (SCT)"
    ) +
    theme(
        plot.title = element_text(hjust = 0.5),  # Center the title
        axis.text = element_text(size = 12),
        axis.title = element_text(size = 14)
    )


```


**Why Use MAST?
Handles zero inflation (dropouts) in single-cell RNA-seq data.
Accounts for technical and biological variation.
Works well when adjusting for covariates (Patient_origin & cell_line).**


# 7. DE Analysis Using RNA Assay (Log-Normalized Counts) + MAST
```{r , fig.height=8, fig.width=12}

# # Load required package for MAST
# library(MAST)
# 
# # Set the default assay to RNA
# DefaultAssay(All_samples_Merged) <- "RNA"
# 
# # Perform LogNormalization on the RNA assay
# All_samples_Merged <- NormalizeData(All_samples_Merged, normalization.method = "LogNormalize", scale.factor = 10000)                                                                                                          
DefaultAssay(All_samples_Merged) <- "RNA"

# Set identity class to cell line
Idents(All_samples_Merged) <- "cell_line"

# Define malignant and normal cell lines
malignant_cell_line_L1 <- c("L1")  
normal_CD4Tcells <- c("PBMC", "PBMC_10x")  

# Perform DE analysis using RNA assay with MAST and covariates
DE_RNA_MAST <- FindMarkers(
  object = All_samples_Merged, 
  ident.1 = malignant_cell_line_L1,
  ident.2 = normal_CD4Tcells,
  assay = "RNA",   # Use RNA-normalized counts
  test.use = "MAST",  # Use MAST test
  min.pct = 0,
  logfc.threshold = 0,
  latent.vars = c("Patient_origin", "cell_line")  # Adjust for patient and cell line
)

# Convert to data frame and add gene names
DE_RNA_MAST <- as.data.frame(DE_RNA_MAST)
DE_RNA_MAST$gene <- rownames(DE_RNA_MAST)

# Reorder columns
DE_RNA_MAST <- DE_RNA_MAST[, c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")]

# Save results
write.csv(DE_RNA_MAST, "Malignant_CD4Tcells_L1_vs_Normal_CD4Tcells_RNA_MAST.csv", row.names = FALSE)



```


# 8. DE Analysis Using SCT Assay (SCTransformed Counts) + MAST
```{r , fig.height=8, fig.width=12}

# Load required package for MAST
library(MAST)

# Set the default assay to SCT
DefaultAssay(All_samples_Merged) <- "SCT"

# Set identity class to cell line
Idents(All_samples_Merged) <- "cell_line"

# Define malignant and normal cell lines
malignant_cell_line_L1 <- c("L1")  
normal_CD4Tcells <- c("PBMC", "PBMC_10x")  

# Perform DE analysis using SCT assay with MAST and covariates
DE_SCT_MAST <- FindMarkers(
  object = All_samples_Merged, 
  ident.1 = malignant_cell_line_L1,
  ident.2 = normal_CD4Tcells,
  assay = "SCT",   # Use SCT-normalized counts
  test.use = "MAST",  # Use MAST test
  min.pct = 0,
  logfc.threshold = 0,
  latent.vars = c("Patient_origin", "cell_line")  # Adjust for patient and cell line
)

# Convert to data frame and add gene names
DE_SCT_MAST <- as.data.frame(DE_SCT_MAST)
DE_SCT_MAST$gene <- rownames(DE_SCT_MAST)

# Reorder columns
DE_SCT_MAST <- DE_SCT_MAST[, c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")]

# Save results
write.csv(DE_SCT_MAST, "Malignant_CD4Tcells_L1_vs_Normal_CD4Tcells_SCT_MAST.csv", row.names = FALSE)


```


# 9. DE Analysis Visualize Results of RNA vs SCT comparison_avg_log2FC_RNA
```{r , fig.height=8, fig.width=12}

# Load required libraries
library(ggplot2)
library(ggrepel)  # For better labeling of outlier genes

# Load DE results
DE_RNA_MAST <- read.csv("Malignant_CD4Tcells_L1_vs_Normal_CD4Tcells_RNA_MAST.csv")
DE_SCT_MAST <- read.csv("Malignant_CD4Tcells_L1_vs_Normal_CD4Tcells_SCT_MAST.csv")

# Find common genes in both datasets
common_genes <- intersect(DE_RNA_MAST$gene, DE_SCT_MAST$gene)

# Filter for common genes
RNA_values <- DE_RNA_MAST[DE_RNA_MAST$gene %in% common_genes, ]
SCT_values <- DE_SCT_MAST[DE_SCT_MAST$gene %in% common_genes, ]

# Merge datasets
comparison_df <- merge(RNA_values, SCT_values, by = "gene", suffixes = c("_RNA", "_SCT"))

# ---- PLOT 1: Log2 Fold Change (Log2FC) Comparison ----
p1 <- ggplot(comparison_df, aes(x = avg_log2FC_RNA, y = avg_log2FC_SCT)) +
    geom_point(alpha = 0.5) +
    geom_abline(slope = 1, intercept = 0, color = "red", linetype = "dashed") +  # Diagonal reference line
    theme_minimal() +
    labs(title = "Log2 Fold Change (Log2FC) Comparison: RNA vs SCT (MAST)",
         x = "Log2 Fold Change (RNA)",
         y = "Log2 Fold Change (SCT)") +
    theme(plot.title = element_text(hjust = 0.5))

# ---- PLOT 2: Adjusted p-value (p_val_adj) Comparison ----
p2 <- ggplot(comparison_df, aes(x = -log10(p_val_adj_RNA), y = -log10(p_val_adj_SCT))) +
    geom_point(alpha = 0.5) +
    geom_abline(slope = 1, intercept = 0, color = "red", linetype = "dashed") +  # Reference line
    theme_minimal() +
    labs(title = "Adjusted p-value (-log10) Comparison: RNA vs SCT (MAST)",
         x = "-log10 Adjusted p-value (RNA)",
         y = "-log10 Adjusted p-value (SCT)") +
    theme(plot.title = element_text(hjust = 0.5))

# ---- PLOT BOTH ----
library(gridExtra)
grid.arrange(p1, p2, ncol = 2)


```


