1. Load Libraries

2. Load Data

2.1 Load CSV Files with Mean Expression


# Load the CSV files with mean expression data
markers_mast_SCT <- read.csv("../0-robj/1-MAST_with_SCT_batch_patient_cellline_as_Covariate_with_meanExpression.csv", row.names = 1)

markers_LR_SCT <- read.csv("../0-robj/1-LR_with_SCT_batch_patient_cellline_as_Covariate_with_meanExpression.csv", row.names = 1)

markers_negbinom_SCT <- read.csv("../0-robj/1-negbinom_with_SCT_batch_patient_cellline_as_Covariate_with_meanExpression.csv", row.names = 1)

3. Summarize Results

3.1 Summary Function

summarize_markers <- function(markers) {
  num_pval0 <- sum(markers$p_val_adj == 0)
  num_pval1 <- sum(markers$p_val_adj == 1)
  num_significant <- sum(markers$p_val_adj < 0.05)
  num_upregulated <- sum(markers$avg_log2FC > 1)
  num_downregulated <- sum(markers$avg_log2FC < -1)
  
  cat("Number of genes with p_val_adj = 0:", num_pval0, "\n")
  cat("Number of genes with p_val_adj = 1:", num_pval1, "\n")
  cat("Number of significant genes (p_val_adj < 0.05):", num_significant, "\n")
  cat("Number of upregulated genes (avg_log2FC > 1):", num_upregulated, "\n")
  cat("Number of downregulated genes (avg_log2FC < 1):", num_downregulated, "\n")
}

3.2 Summarize Markers1 (MAST with SCT Assay)

cat("Markers1 Summary (MAST with Batch Correction):\n")
Markers1 Summary (MAST with Batch Correction):
summarize_markers(markers_mast_SCT)
Number of genes with p_val_adj = 0: 72 
Number of genes with p_val_adj = 1: 8299 
Number of significant genes (p_val_adj < 0.05): 5084 
Number of upregulated genes (avg_log2FC > 1): 9917 
Number of downregulated genes (avg_log2FC < 1): 789 

3.3 Summary for Marker2 (LR with SCT Assay)

cat("Markers3 Summary (MAST without Batch Correction):\n")
Markers3 Summary (MAST without Batch Correction):
summarize_markers(markers_LR_SCT)
Number of genes with p_val_adj = 0: 1 
Number of genes with p_val_adj = 1: 25321 
Number of significant genes (p_val_adj < 0.05): 587 
Number of upregulated genes (avg_log2FC > 1): 15048 
Number of downregulated genes (avg_log2FC < 1): 2281 

3.4 Summary for Markers3 (negbinom with RNA Assay)

cat("Markers3 Summary (MAST without Batch Correction):\n")
Markers3 Summary (MAST without Batch Correction):
summarize_markers(markers_negbinom_SCT)
Number of genes with p_val_adj = 0: 60 
Number of genes with p_val_adj = 1: 21805 
Number of significant genes (p_val_adj < 0.05): 3653 
Number of upregulated genes (avg_log2FC > 1): 15047 
Number of downregulated genes (avg_log2FC < 1): 2262 

4. Volcano Plots for All Genes

4.1 Volcano Plot for MAST with SCT Assay

EnhancedVolcano(markers_mast_SCT,
                lab = markers_mast_SCT$gene,
                x = "avg_log2FC",
                y = "p_val_adj",
                title = "MAST with SCT assay (All Genes)",
                pCutoff = 0.05,
                FCcutoff = 0.5)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...

4.2 Volcano Plot for MAST with SCT Assay

EnhancedVolcano(markers_mast_SCT, 
                lab = markers_mast_SCT$gene,
                x = "avg_log2FC", 
                y = "p_val_adj",
                selectLab = c('KIR3DL2', 'TOX', 'TWIST1', 'CD52', 'CCR4', 'IL2RA', 'CD7', 'DPP4', 'CCR7', 'GATA3', 'FOXP3', 'DNM3','CD74','IL4','IL13', 'NCR1', 'CD70', 'AIRE', 'PDCD1 (PD-1)', 'CD274 (PD-L1)'),
                title = "Sézary CD4 T cells vs Normal CD4 T cells",
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = 0.0001,
                FCcutoff = 0.5, 
                pointSize = 3.0,
                labSize = 3.0,
                boxedLabels = TRUE,
                colAlpha = 0.5,
                legendPosition = 'right',
                legendLabSize = 10,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 0.5,
                colConnectors = 'black',
                arrowheads = FALSE,
                max.overlaps = 30)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...

4.3 Volcano Plot for LR with SCT Assay

EnhancedVolcano(markers_LR_SCT,
                lab = markers_LR_SCT$gene,
                x = "avg_log2FC",
                y = "p_val_adj",
                title = "LR with SCT Assay (All Genes)",
                pCutoff = 0.05,
                FCcutoff = 0.5)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...

4.4 Volcano Plot for markers_LR_SCT

EnhancedVolcano(markers_LR_SCT, 
                lab = markers_LR_SCT$gene,
                x = "avg_log2FC", 
                y = "p_val_adj",
                selectLab = c('KIR3DL2', 'TOX', 'TWIST1', 'CD52', 'CCR4', 'IL2RA', 'CD7', 'DPP4', 'CCR7', 'GATA3', 'FOXP3', 'DNM3','CD74','IL4','IL13', 'NCR1', 'CD70', 'AIRE', 'PDCD1 (PD-1)', 'CD274 (PD-L1)'),
                title = "Sézary CD4 T cells vs Normal CD4 T cells",
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = 0.05,
                FCcutoff = 0.5, 
                pointSize = 3.0,
                labSize = 3.0,
                boxedLabels = TRUE,
                colAlpha = 0.5,
                legendPosition = 'right',
                legendLabSize = 10,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 0.5,
                colConnectors = 'black',
                arrowheads = FALSE,
                max.overlaps = 30)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...

4.5 Volcano Plot for negbinom with SCT Assay

EnhancedVolcano(markers_negbinom_SCT,
                lab = markers_negbinom_SCT$gene,
                x = "avg_log2FC",
                y = "p_val_adj",
                title = "LR with SCT Assay (All Genes)",
                pCutoff = 0.05,
                FCcutoff = 0.5)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...

4.6 Volcano Plot for markers_negbinom_SCT

EnhancedVolcano(markers_negbinom_SCT, 
                lab = markers_negbinom_SCT$gene,
                x = "avg_log2FC", 
                y = "p_val_adj",
                selectLab = c('KIR3DL2', 'TOX', 'TWIST1', 'CD52', 'CCR4', 'IL2RA', 'CD7', 'DPP4', 'CCR7', 'GATA3', 'FOXP3', 'DNM3','CD74','IL4','IL13', 'NCR1', 'CD70', 'AIRE', 'PDCD1 (PD-1)', 'CD274 (PD-L1)'),
                title = "Sézary CD4 T cells vs Normal CD4 T cells",
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = 0.05,
                FCcutoff = 0.5, 
                pointSize = 3.0,
                labSize = 3.0,
                boxedLabels = TRUE,
                colAlpha = 0.5,
                legendPosition = 'right',
                legendLabSize = 10,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 0.5,
                colConnectors = 'black',
                arrowheads = FALSE,
                max.overlaps = 30)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...

5. Filter and Summarize Results

5.1 Apply Expression Filter First

# Apply the expression filter first
markers_mast_SCT <- markers_mast_SCT %>%
  filter(!(mean_expr_group1 < 0.2 & mean_expr_group2 < 0.2))

markers_LR_SCT <- markers_LR_SCT %>%
  filter(!(mean_expr_group1 < 0.2 & mean_expr_group2 < 0.2))

markers_negbinom_SCT <- markers_negbinom_SCT %>%
  filter(!(mean_expr_group1 < 0.2 & mean_expr_group2 < 0.2))

5.2 Summary Function

summarize_markers <- function(markers) {
  num_pval0 <- sum(markers$p_val_adj == 0)
  num_pval1 <- sum(markers$p_val_adj == 1)
  num_significant <- sum(markers$p_val_adj < 0.05)
  num_upregulated <- sum(markers$avg_log2FC > 1)
  num_downregulated <- sum(markers$avg_log2FC < -1)
  
  cat("Number of genes with p_val_adj = 0:", num_pval0, "\n")
  cat("Number of genes with p_val_adj = 1:", num_pval1, "\n")
  cat("Number of significant genes (p_val_adj < 0.05):", num_significant, "\n")
  cat("Number of upregulated genes (avg_log2FC > 1):", num_upregulated, "\n")
  cat("Number of downregulated genes (avg_log2FC < 1):", num_downregulated, "\n")
}

5.3 Summarize Markers1 (MAST with SCT Assay)

cat("Markers1 Summary (MAST with Batch Correction):\n")
Markers1 Summary (MAST with Batch Correction):
summarize_markers(markers_mast_SCT)
Number of genes with p_val_adj = 0: 74 
Number of genes with p_val_adj = 1: 2209 
Number of significant genes (p_val_adj < 0.05): 3042 
Number of upregulated genes (avg_log2FC > 1): 4514 
Number of downregulated genes (avg_log2FC < 1): 125 

5.4 Summary for Markers3 (LR with SCT Assay)

cat("Markers3 Summary (LR with Batch Correction):\n")
Markers3 Summary (LR with Batch Correction):
summarize_markers(markers_LR_SCT)
Number of genes with p_val_adj = 0: 0 
Number of genes with p_val_adj = 1: 5214 
Number of significant genes (p_val_adj < 0.05): 358 
Number of upregulated genes (avg_log2FC > 1): 4514 
Number of downregulated genes (avg_log2FC < 1): 125 

5.5 Summary for Markers3 (negbinom with SCT Assay)

cat("Markers3 Summary (negbinom with Batch Correction):\n")
Markers3 Summary (negbinom with Batch Correction):
summarize_markers(markers_negbinom_SCT)
Number of genes with p_val_adj = 0: 59 
Number of genes with p_val_adj = 1: 3075 
Number of significant genes (p_val_adj < 0.05): 2340 
Number of upregulated genes (avg_log2FC > 1): 4514 
Number of downregulated genes (avg_log2FC < 1): 125 

5.6 Apply Additional Filters for Statistical Significance and Fold Change

# Define filtering criteria
p_val_threshold <- 0.05
logfc_threshold <- 1

# Apply additional filters for markers_mast_batch
filtered_markers_mast_SCT <- markers_mast_SCT %>%
  filter(p_val_adj < p_val_threshold & abs(avg_log2FC) > logfc_threshold)


filtered_markers_LR_SCT <- markers_LR_SCT %>%
  filter(p_val_adj < p_val_threshold & abs(avg_log2FC) > logfc_threshold)

filtered_markers_negbinom_SCT <- markers_negbinom_SCT %>%
  filter(p_val_adj < p_val_threshold & abs(avg_log2FC) > logfc_threshold)


# Save filtered results
  write.csv(filtered_markers_mast_SCT, file = "../0-robj/SCT_MAST_LR_negbinom/1-MAST_with_SCT_batch_cellline_as_Covariate_with_meanExpression_filtered.csv", row.names = TRUE)
write.csv(filtered_markers_LR_SCT, file = "../0-robj/SCT_MAST_LR_negbinom/1-LR_with_SCT_batch_cellline_as_Covariate_with_meanExpression_filtered.csv", row.names = TRUE)

write.csv(filtered_markers_negbinom_SCT, file = "../0-robj/SCT_MAST_LR_negbinom/1-negbinom_with_SCT_batch_cellline_as_Covariate_with_meanExpression_filtered.csv", row.names = TRUE)

5.7 Summary Function

summarize_markers <- function(markers) {
  num_pval0 <- sum(markers$p_val_adj == 0)
  num_pval1 <- sum(markers$p_val_adj == 1)
  num_significant <- sum(markers$p_val_adj < 0.05)
  num_upregulated <- sum(markers$avg_log2FC > 1)
  num_downregulated <- sum(markers$avg_log2FC < -1)
  
  cat("Number of genes with p_val_adj = 0:", num_pval0, "\n")
  cat("Number of genes with p_val_adj = 1:", num_pval1, "\n")
  cat("Number of significant genes (p_val_adj < 0.05):", num_significant, "\n")
  cat("Number of upregulated genes (avg_log2FC > 1):", num_upregulated, "\n")
  cat("Number of downregulated genes (avg_log2FC < -1):", num_downregulated, "\n")
}

5.8 Summarize Markers1 (MAST with Batch Correction)

cat("Markers1 Summary (markers_mast_SCT):\n")
Markers1 Summary (markers_mast_SCT):
summarize_markers(filtered_markers_mast_SCT)
Number of genes with p_val_adj = 0: 35 
Number of genes with p_val_adj = 1: 0 
Number of significant genes (p_val_adj < 0.05): 2077 
Number of upregulated genes (avg_log2FC > 1): 1956 
Number of downregulated genes (avg_log2FC < -1): 121 

5.9 Summarize Markers2 (markers_LR_SCT)

cat("Markers2 Summary (markers_LR_SCT):\n")
Markers2 Summary (markers_LR_SCT):
summarize_markers(filtered_markers_LR_SCT)
Number of genes with p_val_adj = 0: 0 
Number of genes with p_val_adj = 1: 0 
Number of significant genes (p_val_adj < 0.05): 235 
Number of upregulated genes (avg_log2FC > 1): 198 
Number of downregulated genes (avg_log2FC < -1): 37 

5.10 Summarize Markers2 (markers_negbinom_SCT)

cat("Markers2 Summary (markers_negbinom_SCT):\n")
Markers2 Summary (markers_negbinom_SCT):
summarize_markers(filtered_markers_negbinom_SCT)
Number of genes with p_val_adj = 0: 23 
Number of genes with p_val_adj = 1: 0 
Number of significant genes (p_val_adj < 0.05): 1708 
Number of upregulated genes (avg_log2FC > 1): 1619 
Number of downregulated genes (avg_log2FC < -1): 89 

6. Visualization of Filtered Results

6.1 Volcano Plot for markers_mast_SCT

EnhancedVolcano(filtered_markers_mast_SCT,
                lab = filtered_markers_mast_SCT$gene,
                x = "avg_log2FC",
                y = "p_val_adj",
                title = "MAST with SCT assay (All Genes)",
                pCutoff = 0.05,
                FCcutoff = 1)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...

6.2 Volcano Plot for MAST with SCT Assay

EnhancedVolcano(filtered_markers_mast_SCT, 
                lab = filtered_markers_mast_SCT$gene,
                x = "avg_log2FC", 
                y = "p_val_adj",
                selectLab = c('KIR3DL2', 'TOX', 'TWIST1', 'CD52', 'CCR4', 'IL2RA', 'CD7', 'DPP4', 'CCR7', 'GATA3', 'FOXP3', 'DNM3', 'NCR1', 'CD70', 'AIRE', 'PDCD1 (PD-1)', 'CD274 (PD-L1)'),
                title = "Sézary CD4 T cells vs Normal CD4 T cells",
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = 0.0001,
                FCcutoff = 1, 
                pointSize = 3.0,
                labSize = 3.0,
                boxedLabels = TRUE,
                colAlpha = 0.5,
                legendPosition = 'right',
                legendLabSize = 10,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 0.5,
                colConnectors = 'black',
                arrowheads = FALSE,
                max.overlaps = 30)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...

6.3 Volcano Plot for LR with SCT Assay

EnhancedVolcano(filtered_markers_LR_SCT,
                lab = filtered_markers_LR_SCT$gene,
                x = "avg_log2FC",
                y = "p_val_adj",
                title = "LR with SCT Assay (All Genes)",
                pCutoff = 0.05,
                FCcutoff = 1.0)

6.4 Volcano Plot for LR with SCT Assay

EnhancedVolcano(filtered_markers_LR_SCT, 
                lab = filtered_markers_LR_SCT$gene,
                x = "avg_log2FC", 
                y = "p_val_adj",
                selectLab = c('KIR3DL2', 'TOX', 'TWIST1', 'CD52', 'CCR4', 'IL2RA', 'CD7', 'DPP4', 'CCR7', 'GATA3', 'FOXP3', 'DNM3', 'NCR1', 'CD70', 'AIRE', 'PDCD1 (PD-1)', 'CD274 (PD-L1)'),
                title = "Sézary CD4 T cells vs Normal CD4 T cells",
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = 0.05,
                FCcutoff = 1, 
                pointSize = 3.0,
                labSize = 3.0,
                boxedLabels = TRUE,
                colAlpha = 0.5,
                legendPosition = 'right',
                legendLabSize = 10,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 0.5,
                colConnectors = 'black',
                arrowheads = FALSE,
                max.overlaps = 30)

6.5 Volcano Plot for negbinom with SCT Assay

EnhancedVolcano(filtered_markers_negbinom_SCT,
                lab = filtered_markers_negbinom_SCT$gene,
                x = "avg_log2FC",
                y = "p_val_adj",
                title = "LR with SCT Assay (All Genes)",
                pCutoff = 0.05,
                FCcutoff = 1.0)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...

6.6 Volcano Plot for negbinom with SCT Assay

EnhancedVolcano(filtered_markers_negbinom_SCT, 
                lab = filtered_markers_negbinom_SCT$gene,
                x = "avg_log2FC", 
                y = "p_val_adj",
                selectLab = c('KIR3DL2', 'TOX', 'TWIST1', 'CD52', 'CCR4', 'IL2RA', 'CD7', 'DPP4', 'CCR7', 'GATA3', 'FOXP3', 'DNM3', 'NCR1', 'CD70', 'AIRE', 'PDCD1 (PD-1)', 'CD274 (PD-L1)'),
                title = "Sézary CD4 T cells vs Normal CD4 T cells",
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = 0.05,
                FCcutoff = 1, 
                pointSize = 3.0,
                labSize = 3.0,
                boxedLabels = TRUE,
                colAlpha = 0.5,
                legendPosition = 'right',
                legendLabSize = 10,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 0.5,
                colConnectors = 'black',
                arrowheads = FALSE,
                max.overlaps = 30)
Avis : One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...

---
title: "Filtering and Visualization(MAST, LR and negbinom on SCT)"
author: Nasir Mahmood Abbasi
date: "`r Sys.Date()`"
output:
  html_notebook:
    toc: true
    toc_float: true
    toc_collapsed: true
---

# 1. Load Libraries
```{r setup, include=FALSE}
library(dplyr)
library(ggplot2)
library(EnhancedVolcano)
library(pheatmap)
library(tibble)
```

# 2. Load Data
## 2.1 Load CSV Files with Mean Expression
```{r}

# Load the CSV files with mean expression data
markers_mast_SCT <- read.csv("../0-robj/1-MAST_with_SCT_batch_patient_cellline_as_Covariate_with_meanExpression.csv", row.names = 1)

markers_LR_SCT <- read.csv("../0-robj/1-LR_with_SCT_batch_patient_cellline_as_Covariate_with_meanExpression.csv", row.names = 1)

markers_negbinom_SCT <- read.csv("../0-robj/1-negbinom_with_SCT_batch_patient_cellline_as_Covariate_with_meanExpression.csv", row.names = 1)

```

# 3. Summarize Results
## 3.1 Summary Function
```{r}
summarize_markers <- function(markers) {
  num_pval0 <- sum(markers$p_val_adj == 0)
  num_pval1 <- sum(markers$p_val_adj == 1)
  num_significant <- sum(markers$p_val_adj < 0.05)
  num_upregulated <- sum(markers$avg_log2FC > 1)
  num_downregulated <- sum(markers$avg_log2FC < -1)
  
  cat("Number of genes with p_val_adj = 0:", num_pval0, "\n")
  cat("Number of genes with p_val_adj = 1:", num_pval1, "\n")
  cat("Number of significant genes (p_val_adj < 0.05):", num_significant, "\n")
  cat("Number of upregulated genes (avg_log2FC > 1):", num_upregulated, "\n")
  cat("Number of downregulated genes (avg_log2FC < 1):", num_downregulated, "\n")
}

```

## 3.2 Summarize Markers1 (MAST with SCT Assay)
```{r}
cat("Markers1 Summary (MAST with Batch Correction):\n")
summarize_markers(markers_mast_SCT)

```



## 3.3 Summary for Marker2 (LR with SCT Assay)
```{r}
cat("Markers3 Summary (MAST without Batch Correction):\n")
summarize_markers(markers_LR_SCT)
```
## 3.4 Summary for Markers3 (negbinom with RNA Assay)
```{r}
cat("Markers3 Summary (MAST without Batch Correction):\n")
summarize_markers(markers_negbinom_SCT)
```


# 4. Volcano Plots for All Genes
## 4.1 Volcano Plot for MAST with SCT Assay
```{r, fig.height=12, fig.width=12}
EnhancedVolcano(markers_mast_SCT,
                lab = markers_mast_SCT$gene,
                x = "avg_log2FC",
                y = "p_val_adj",
                title = "MAST with SCT assay (All Genes)",
                pCutoff = 0.05,
                FCcutoff = 0.5)
```

## 4.2 Volcano Plot for MAST with SCT Assay
```{r, fig.height=12, fig.width=12}
EnhancedVolcano(markers_mast_SCT, 
                lab = markers_mast_SCT$gene,
                x = "avg_log2FC", 
                y = "p_val_adj",
                selectLab = c('KIR3DL2', 'TOX', 'TWIST1', 'CD52', 'CCR4', 'IL2RA', 'CD7', 'DPP4', 'CCR7', 'GATA3', 'FOXP3', 'DNM3','CD74','IL4','IL13', 'NCR1', 'CD70', 'AIRE', 'PDCD1 (PD-1)', 'CD274 (PD-L1)'),
                title = "Sézary CD4 T cells vs Normal CD4 T cells",
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = 0.0001,
                FCcutoff = 0.5, 
                pointSize = 3.0,
                labSize = 3.0,
                boxedLabels = TRUE,
                colAlpha = 0.5,
                legendPosition = 'right',
                legendLabSize = 10,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 0.5,
                colConnectors = 'black',
                arrowheads = FALSE,
                max.overlaps = 30)

```

## 4.3 Volcano Plot for LR with SCT Assay
```{r, fig.height=12, fig.width=12}
EnhancedVolcano(markers_LR_SCT,
                lab = markers_LR_SCT$gene,
                x = "avg_log2FC",
                y = "p_val_adj",
                title = "LR with SCT Assay (All Genes)",
                pCutoff = 0.05,
                FCcutoff = 0.5)
```


## 4.4 Volcano Plot for markers_LR_SCT
```{r, fig.height=12, fig.width=12}
EnhancedVolcano(markers_LR_SCT, 
                lab = markers_LR_SCT$gene,
                x = "avg_log2FC", 
                y = "p_val_adj",
                selectLab = c('KIR3DL2', 'TOX', 'TWIST1', 'CD52', 'CCR4', 'IL2RA', 'CD7', 'DPP4', 'CCR7', 'GATA3', 'FOXP3', 'DNM3','CD74','IL4','IL13', 'NCR1', 'CD70', 'AIRE', 'PDCD1 (PD-1)', 'CD274 (PD-L1)'),
                title = "Sézary CD4 T cells vs Normal CD4 T cells",
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = 0.05,
                FCcutoff = 0.5, 
                pointSize = 3.0,
                labSize = 3.0,
                boxedLabels = TRUE,
                colAlpha = 0.5,
                legendPosition = 'right',
                legendLabSize = 10,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 0.5,
                colConnectors = 'black',
                arrowheads = FALSE,
                max.overlaps = 30)

```
## 4.5 Volcano Plot for negbinom with SCT Assay
```{r, fig.height=12, fig.width=12}
EnhancedVolcano(markers_negbinom_SCT,
                lab = markers_negbinom_SCT$gene,
                x = "avg_log2FC",
                y = "p_val_adj",
                title = "LR with SCT Assay (All Genes)",
                pCutoff = 0.05,
                FCcutoff = 0.5)
```

## 4.6 Volcano Plot for markers_negbinom_SCT
```{r, fig.height=12, fig.width=12}
EnhancedVolcano(markers_negbinom_SCT, 
                lab = markers_negbinom_SCT$gene,
                x = "avg_log2FC", 
                y = "p_val_adj",
                selectLab = c('KIR3DL2', 'TOX', 'TWIST1', 'CD52', 'CCR4', 'IL2RA', 'CD7', 'DPP4', 'CCR7', 'GATA3', 'FOXP3', 'DNM3','CD74','IL4','IL13', 'NCR1', 'CD70', 'AIRE', 'PDCD1 (PD-1)', 'CD274 (PD-L1)'),
                title = "Sézary CD4 T cells vs Normal CD4 T cells",
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = 0.05,
                FCcutoff = 0.5, 
                pointSize = 3.0,
                labSize = 3.0,
                boxedLabels = TRUE,
                colAlpha = 0.5,
                legendPosition = 'right',
                legendLabSize = 10,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 0.5,
                colConnectors = 'black',
                arrowheads = FALSE,
                max.overlaps = 30)

```

# 5. Filter and Summarize Results
## 5.1 Apply Expression Filter First
```{r}
# Apply the expression filter first
markers_mast_SCT <- markers_mast_SCT %>%
  filter(!(mean_expr_group1 < 0.2 & mean_expr_group2 < 0.2))

markers_LR_SCT <- markers_LR_SCT %>%
  filter(!(mean_expr_group1 < 0.2 & mean_expr_group2 < 0.2))

markers_negbinom_SCT <- markers_negbinom_SCT %>%
  filter(!(mean_expr_group1 < 0.2 & mean_expr_group2 < 0.2))

```


## 5.2 Summary Function
```{r}
summarize_markers <- function(markers) {
  num_pval0 <- sum(markers$p_val_adj == 0)
  num_pval1 <- sum(markers$p_val_adj == 1)
  num_significant <- sum(markers$p_val_adj < 0.05)
  num_upregulated <- sum(markers$avg_log2FC > 1)
  num_downregulated <- sum(markers$avg_log2FC < -1)
  
  cat("Number of genes with p_val_adj = 0:", num_pval0, "\n")
  cat("Number of genes with p_val_adj = 1:", num_pval1, "\n")
  cat("Number of significant genes (p_val_adj < 0.05):", num_significant, "\n")
  cat("Number of upregulated genes (avg_log2FC > 1):", num_upregulated, "\n")
  cat("Number of downregulated genes (avg_log2FC < 1):", num_downregulated, "\n")
}

```

## 5.3 Summarize Markers1 (MAST with SCT Assay)
```{r}
cat("Markers1 Summary (MAST with Batch Correction):\n")
summarize_markers(markers_mast_SCT)

```



## 5.4 Summary for Markers3 (LR with SCT Assay)
```{r}
cat("Markers3 Summary (LR with Batch Correction):\n")
summarize_markers(markers_LR_SCT)
```

## 5.5 Summary for Markers3 (negbinom with SCT Assay)
```{r}
cat("Markers3 Summary (negbinom with Batch Correction):\n")
summarize_markers(markers_negbinom_SCT)
```

## 5.6 Apply Additional Filters for Statistical Significance and Fold Change
```{r}
# Define filtering criteria
p_val_threshold <- 0.05
logfc_threshold <- 1

# Apply additional filters for markers_mast_batch
filtered_markers_mast_SCT <- markers_mast_SCT %>%
  filter(p_val_adj < p_val_threshold & abs(avg_log2FC) > logfc_threshold)


filtered_markers_LR_SCT <- markers_LR_SCT %>%
  filter(p_val_adj < p_val_threshold & abs(avg_log2FC) > logfc_threshold)

filtered_markers_negbinom_SCT <- markers_negbinom_SCT %>%
  filter(p_val_adj < p_val_threshold & abs(avg_log2FC) > logfc_threshold)


# Save filtered results
  write.csv(filtered_markers_mast_SCT, file = "../0-robj/SCT_MAST_LR_negbinom/1-MAST_with_SCT_batch_cellline_as_Covariate_with_meanExpression_filtered.csv", row.names = TRUE)
write.csv(filtered_markers_LR_SCT, file = "../0-robj/SCT_MAST_LR_negbinom/1-LR_with_SCT_batch_cellline_as_Covariate_with_meanExpression_filtered.csv", row.names = TRUE)

write.csv(filtered_markers_negbinom_SCT, file = "../0-robj/SCT_MAST_LR_negbinom/1-negbinom_with_SCT_batch_cellline_as_Covariate_with_meanExpression_filtered.csv", row.names = TRUE)

```

## 5.7 Summary Function
```{r}
summarize_markers <- function(markers) {
  num_pval0 <- sum(markers$p_val_adj == 0)
  num_pval1 <- sum(markers$p_val_adj == 1)
  num_significant <- sum(markers$p_val_adj < 0.05)
  num_upregulated <- sum(markers$avg_log2FC > 1)
  num_downregulated <- sum(markers$avg_log2FC < -1)
  
  cat("Number of genes with p_val_adj = 0:", num_pval0, "\n")
  cat("Number of genes with p_val_adj = 1:", num_pval1, "\n")
  cat("Number of significant genes (p_val_adj < 0.05):", num_significant, "\n")
  cat("Number of upregulated genes (avg_log2FC > 1):", num_upregulated, "\n")
  cat("Number of downregulated genes (avg_log2FC < -1):", num_downregulated, "\n")
}
```

## 5.8 Summarize Markers1 (MAST with Batch Correction)
```{r}
cat("Markers1 Summary (markers_mast_SCT):\n")
summarize_markers(filtered_markers_mast_SCT)
```

## 5.9 Summarize Markers2 (markers_LR_SCT)
```{r}
cat("Markers2 Summary (markers_LR_SCT):\n")
summarize_markers(filtered_markers_LR_SCT)
```

## 5.10 Summarize Markers2 (markers_negbinom_SCT)
```{r}
cat("Markers2 Summary (markers_negbinom_SCT):\n")
summarize_markers(filtered_markers_negbinom_SCT)
```

# 6. Visualization of Filtered Results
## 6.1 Volcano Plot for markers_mast_SCT
```{r, fig.height=12, fig.width=12}
EnhancedVolcano(filtered_markers_mast_SCT,
                lab = filtered_markers_mast_SCT$gene,
                x = "avg_log2FC",
                y = "p_val_adj",
                title = "MAST with SCT assay (All Genes)",
                pCutoff = 0.05,
                FCcutoff = 1)
```


## 6.2 Volcano Plot for MAST with SCT Assay
```{r, fig.height=12, fig.width=12}
EnhancedVolcano(filtered_markers_mast_SCT, 
                lab = filtered_markers_mast_SCT$gene,
                x = "avg_log2FC", 
                y = "p_val_adj",
                selectLab = c('KIR3DL2', 'TOX', 'TWIST1', 'CD52', 'CCR4', 'IL2RA', 'CD7', 'DPP4', 'CCR7', 'GATA3', 'FOXP3', 'DNM3', 'NCR1', 'CD70', 'AIRE', 'PDCD1 (PD-1)', 'CD274 (PD-L1)'),
                title = "Sézary CD4 T cells vs Normal CD4 T cells",
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = 0.0001,
                FCcutoff = 1, 
                pointSize = 3.0,
                labSize = 3.0,
                boxedLabels = TRUE,
                colAlpha = 0.5,
                legendPosition = 'right',
                legendLabSize = 10,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 0.5,
                colConnectors = 'black',
                arrowheads = FALSE,
                max.overlaps = 30)

```

## 6.3 Volcano Plot for LR with SCT Assay
```{r, fig.height=12, fig.width=12}
EnhancedVolcano(filtered_markers_LR_SCT,
                lab = filtered_markers_LR_SCT$gene,
                x = "avg_log2FC",
                y = "p_val_adj",
                title = "LR with SCT Assay (All Genes)",
                pCutoff = 0.05,
                FCcutoff = 1.0)
```


## 6.4 Volcano Plot for LR with SCT Assay
```{r, fig.height=12, fig.width=12}
EnhancedVolcano(filtered_markers_LR_SCT, 
                lab = filtered_markers_LR_SCT$gene,
                x = "avg_log2FC", 
                y = "p_val_adj",
                selectLab = c('KIR3DL2', 'TOX', 'TWIST1', 'CD52', 'CCR4', 'IL2RA', 'CD7', 'DPP4', 'CCR7', 'GATA3', 'FOXP3', 'DNM3', 'NCR1', 'CD70', 'AIRE', 'PDCD1 (PD-1)', 'CD274 (PD-L1)'),
                title = "Sézary CD4 T cells vs Normal CD4 T cells",
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = 0.05,
                FCcutoff = 1, 
                pointSize = 3.0,
                labSize = 3.0,
                boxedLabels = TRUE,
                colAlpha = 0.5,
                legendPosition = 'right',
                legendLabSize = 10,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 0.5,
                colConnectors = 'black',
                arrowheads = FALSE,
                max.overlaps = 30)

```

## 6.5 Volcano Plot for negbinom with SCT Assay
```{r, fig.height=12, fig.width=12}
EnhancedVolcano(filtered_markers_negbinom_SCT,
                lab = filtered_markers_negbinom_SCT$gene,
                x = "avg_log2FC",
                y = "p_val_adj",
                title = "LR with SCT Assay (All Genes)",
                pCutoff = 0.05,
                FCcutoff = 1.0)
```


## 6.6 Volcano Plot for negbinom with SCT Assay
```{r, fig.height=12, fig.width=12}
EnhancedVolcano(filtered_markers_negbinom_SCT, 
                lab = filtered_markers_negbinom_SCT$gene,
                x = "avg_log2FC", 
                y = "p_val_adj",
                selectLab = c('KIR3DL2', 'TOX', 'TWIST1', 'CD52', 'CCR4', 'IL2RA', 'CD7', 'DPP4', 'CCR7', 'GATA3', 'FOXP3', 'DNM3', 'NCR1', 'CD70', 'AIRE', 'PDCD1 (PD-1)', 'CD274 (PD-L1)'),
                title = "Sézary CD4 T cells vs Normal CD4 T cells",
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = 0.05,
                FCcutoff = 1, 
                pointSize = 3.0,
                labSize = 3.0,
                boxedLabels = TRUE,
                colAlpha = 0.5,
                legendPosition = 'right',
                legendLabSize = 10,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 0.5,
                colConnectors = 'black',
                arrowheads = FALSE,
                max.overlaps = 30)

```