This analysis validates CLIC1 as a biomarker in Sézary Syndrome using publicly available single-cell RNA-seq data from Herrera et al. (2021). The dataset includes:
Reference: Herrera et al. (2021) - PMC8532199
# Base directory
base_dir <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Final_Year_Results/2025-Year3_Analysis/Biomarkers_Validation_with_Public_Data/Herrera_Data/"
# Seurat object
seurat_file <- file.path(base_dir, "Herrara_All_samples.rds")
# TCR data directory
tcr_dir <- file.path(base_dir, "Herrara_TCR_AB")
# Output directory
output_dir <- "output"
dir.create(output_dir, showWarnings = FALSE)
cat("Loading Seurat object...\n")
Loading Seurat object...
merged_seurat <- readRDS(seurat_file)
cat("Dataset dimensions:\n")
Dataset dimensions:
cat(sprintf(" - Cells: %d\n", ncol(merged_seurat)))
- Cells: 30345
cat(sprintf(" - Genes: %d\n", nrow(merged_seurat)))
- Genes: 33660
cat(sprintf(" - Samples: %s\n", paste(unique(merged_seurat$sample_id), collapse = ", ")))
- Samples: SS1_Blood, SS2_Blood, SS3_Blood, SS4_Blood, SS5_Blood, SS6_Blood, MF1_Blood, SS1_Skin, SS2_Skin, SS3_Skin, SS4_Skin, MF1_Skin, Healthy_Skin, Healthy_Blood
tcr_list <- lapply(names(tcr_files), function(sample_name) {
cat("Reading:", sample_name, "\n")
df <- read.delim(gzfile(tcr_files[[sample_name]]), check.names = FALSE)
colnames(df)[1] <- "clonotype"
df_long <- df %>%
pivot_longer(
cols = -clonotype,
names_to = "cell_barcode",
values_to = "count"
) %>%
filter(count > 0) %>%
mutate(sample_id = sample_name)
return(df_long)
})
Reading: HC1_Blood
Reading: HC1_Skin
Reading: SS1_Blood
Reading: SS1_Skin
Reading: SS2_Blood
Reading: SS2_Skin
Reading: SS3_Blood
Reading: SS3_Skin
Reading: SS4_Blood
Reading: SS4_Skin
Reading: MFIV1_Blood
Reading: MFIV1_Skin
Reading: SS5_Blood
Reading: SS6_Blood
combined_tcr <- bind_rows(tcr_list)
cat("Total productive TCR assignments:", nrow(combined_tcr), "\n")
Total productive TCR assignments: 17899
library(stringr)
# Extract alpha and beta chains from clonotype
combined_tcr <- combined_tcr %>%
mutate(
TCR_alpha = str_extract(clonotype, "(?<=TRA:)[^|]+"),
TCR_beta = str_extract(clonotype, "(?<=TRB:)[^|]+")
)
# Make cell_barcode match Seurat object
combined_tcr <- combined_tcr %>%
mutate(cell_barcode = paste0(sample_id, "_", cell_barcode))
# Keep only cells present in Seurat
tcr_in_seurat <- combined_tcr[combined_tcr$cell_barcode %in% colnames(merged_seurat), ]
# Create a metadata table for Seurat
tcr_meta <- tcr_in_seurat %>%
select(cell_barcode, clonotype, TCR_alpha, TCR_beta) %>%
column_to_rownames("cell_barcode")
merged_seurat <- AddMetaData(merged_seurat, metadata = tcr_meta)
# =========================
# Identify expanded malignant T cells per SS sample using scRepertoire
# =========================
ss_samples <- c("SS1_Blood", "SS1_Skin", "SS2_Blood", "SS2_Skin",
"SS3_Blood", "SS3_Skin", "SS4_Blood", "SS4_Skin",
"SS5_Blood", "SS6_Blood")
malignant_cells_list <- list()
for (s in ss_samples) {
sample_cells <- subset(merged_seurat, subset = sample_id == s)
# Skip sample if no TCR_beta
if (!"TCR_beta" %in% colnames(sample_cells@meta.data)) next
clonotype_counts <- table(sample_cells$TCR_beta)
expanded_clones <- names(clonotype_counts[clonotype_counts >= 5])
# Skip if no expanded clones
if (length(expanded_clones) == 0) next
malignant <- subset(sample_cells, subset = TCR_beta %in% expanded_clones)
# Only add if there are cells
if (ncol(malignant) > 0) malignant_cells_list[[s]] <- malignant
}
# =========================
# Merge all malignant SS cells (robust)
# =========================
malignant_cells_list <- malignant_cells_list[!sapply(malignant_cells_list, is.null)] # remove NULLs
malignant_cells_list <- unname(malignant_cells_list) # remove names
sezary_cells <- Reduce(function(x, y) merge(x, y), malignant_cells_list) # merge all
library(scRepertoire)
library(Seurat)
# ================================
# Clonal homeostasis (size distribution)
# ================================
clonalHomeostasis(sezary_cells, cloneCall = "clonotype")
# ================================
# Clonal proportion / relative abundance
# ================================
clonalProportion(sezary_cells, cloneCall = "clonotype")
NA
NA
library(scRepertoire)
clonalOverlap(sezary_cells, cloneCall = "clonotype", group.by = "sample_id", method = "morisita")
NA
NA
# ================================
# Merge all Sézary malignant cells with healthy controls
# ================================
# Ensure RNA assay
DefaultAssay(sezary_cells) <- "RNA"
# Normalize and identify variable features
sezary_cells <- NormalizeData(sezary_cells, normalization.method = "LogNormalize", scale.factor = 10000)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
sezary_cells <- FindVariableFeatures(sezary_cells, selection.method = "vst", nfeatures = 2000)
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
sezary_cells <- ScaleData(sezary_cells)
|
| | 0%
|
|================================================ | 50%
|
|=================================================================================================| 100%
sezary_cells <- RunPCA(sezary_cells, npcs = 20)
# Subset healthy control cells
hc_cells <- subset(merged_seurat, subset = sample_id %in% c("Healthy_Blood", "Healthy_Skin"))
# Merge all together
ss_vs_hc <- merge(
x = sezary_cells,
y = hc_cells,
add.cell.ids = c("SS", "HC")
)
# ================================
# Normalize, find variable features, scale, and run PCA & UMAP on merged object
# ================================
DefaultAssay(ss_vs_hc) <- "RNA"
ss_vs_hc <- NormalizeData(ss_vs_hc, normalization.method = "LogNormalize", scale.factor = 10000)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
ss_vs_hc <- FindVariableFeatures(ss_vs_hc, selection.method = "vst", nfeatures = 2000)
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
ss_vs_hc <- ScaleData(ss_vs_hc)
|
| | 0%
|
|================================================ | 50%
|
|=================================================================================================| 100%
ss_vs_hc <- RunPCA(ss_vs_hc, npcs = 20)
ss_vs_hc <- RunUMAP(ss_vs_hc, dims = 1:20)
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
# ================================
# Add tissue grouping column
# ================================
ss_vs_hc$tissue_group <- case_when(
grepl("Blood", ss_vs_hc$sample_id, ignore.case = TRUE) ~ "Blood",
grepl("Skin", ss_vs_hc$sample_id, ignore.case = TRUE) ~ "Skin",
TRUE ~ "Other"
)
# ================================
# Visualize CLIC1 expression
# ================================
# FeaturePlot with red gradient
FeaturePlot(ss_vs_hc, features = "CLIC1", reduction = "umap",
cols = c("grey90", "red"), label = TRUE)
# DotPlot by sample_id with red-grey gradient
DotPlot(ss_vs_hc, features = "CLIC1", group.by = "sample_id") +
RotatedAxis() +
scale_color_gradient(low = "grey90", high = "red") +
ggtitle("CLIC1 Expression by Sample") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
# DotPlot by tissue_group with red-grey gradient
DotPlot(ss_vs_hc, features = "CLIC1", group.by = "tissue_group") +
RotatedAxis() +
scale_color_gradient(low = "grey90", high = "red") +
ggtitle("CLIC1 Expression by Tissue") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
# ================================
# Visualize CLIC1
# ================================
# FeaturePlot
FeaturePlot(ss_vs_hc, features = "CLIC1", reduction = "umap")
# DotPlot grouped by sample
DotPlot(ss_vs_hc, features = "CLIC1", group.by = "sample_id") + RotatedAxis()
# DotPlot grouped by tissue
DotPlot(ss_vs_hc, features = "CLIC1", group.by = "tissue_group") + RotatedAxis()
# DimPlot by sample
DimPlot(ss_vs_hc, group.by = "sample_id", reduction = "umap")
# FeaturePlot for CLIC1
FeaturePlot(ss_vs_hc, features = "CLIC1", reduction = "umap")
# DotPlot for CLIC1 across groups
DotPlot(ss_vs_hc, features = "CLIC1", group.by = "sample_id") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# FeaturePlot with red gradient
FeaturePlot(ss_vs_hc, features = "CLIC1", reduction = "umap",
cols = c("grey90", "red"), label = TRUE, repel = T)
top_50_up <- read.csv("top_50_upregulated.csv") # or read.delim("top_50_up.tsv")
top_50_down <- read.csv("top_50_downregulated.csv")
FeaturePlot(ss_vs_hc,
features = top_50_up$gene[1:10],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_vs_hc,
features = top_50_up$gene[11:20],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_vs_hc,
features = top_50_up$gene[21:30],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_vs_hc,
features = top_50_up$gene[31:40],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_vs_hc,
features = top_50_up$gene[41:50],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_vs_hc,
features = top_50_down$gene[1:10],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_vs_hc,
features = top_50_down$gene[11:20],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_vs_hc,
features = top_50_down$gene[21:30],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_vs_hc,
features = top_50_down$gene[31:40],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_vs_hc,
features = top_50_down$gene[41:50],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
)
Error: unexpected ')' in " )"
# Vector of genes to plot
up_genes <- c("CLIC1", "COX5A","GTSF1", "MAD2L1","MYBL2","MYL6B","NME1","PLK1", "PYCR1", "SLC25A5", "SRI", "TUBA1C", "UBE2T", "YWHAH")
# DotPlot with custom firebrick-red gradient
DotPlot(ss_vs_hc, features = up_genes) +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Upregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
# DotPlot with custom firebrick-red gradient
DotPlot(ss_vs_hc, features = up_genes, group.by = "sample_id") +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Upregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
# DotPlot with custom firebrick-red gradient
DotPlot(ss_vs_hc, features = up_genes, group.by = "tissue") +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Upregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
# DotPlot with custom firebrick-red gradient
DotPlot(ss_vs_hc, features = up_genes, group.by = "condition") +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Upregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
# DotPlot with custom firebrick-red gradient
DotPlot(ss_vs_hc, features = up_genes, group.by = "condition") +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Upregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14))
# Downregulated genes
down_genes <- c("TXNIP", "RASA3", "RIPOR2",
"ZFP36", "ZFP36L1", "ZFP36L2",
"PRMT2", "MAX", "PIK3IP1",
"BTG1", "CDKN1B")
# DotPlot with firebrick color for high expression
DotPlot(ss_vs_hc, features = down_genes) +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Downregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
# DotPlot with firebrick color for high expression
DotPlot(ss_vs_hc, features = down_genes, group.by = "sample_id") +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Downregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
# DotPlot with firebrick color for high expression
DotPlot(ss_vs_hc, features = down_genes, group.by = "tissue") +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Downregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
# DotPlot with firebrick color for high expression
DotPlot(ss_vs_hc, features = down_genes, group.by = "condition") +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Downregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
# Load required libraries
library(Seurat)
library(dplyr)
library(tibble)
combined_seu <- ss_vs_hc
# Join the layers of the RNA assay
combined_seu <- JoinLayers(combined_seu, assay = "RNA")
# Ensure your identity class is set to disease status
Idents(combined_seu) <- "condition" # e.g., levels: "SS", "Control"
# Run differential expression between SS vs Control
markers_disease <- FindMarkers(
object = combined_seu,
ident.1 = "SS",
ident.2 = "Healthy",
assay = "RNA",
logfc.threshold = 0,
min.pct = 0,
test.use = "wilcox" # or "MAST" if RNA assay supports it
)
# Save results to CSV
write.csv(markers_disease, file = "DE_SS_vs_Healthy_Herrara2021.csv", row.names = TRUE)
# Get log-normalized expression matrix (RNA assay)
expression_data_RNA <- GetAssayData(combined_seu, assay = "RNA", slot = "data")
# Get cell names for each group
ss_cells <- WhichCells(combined_seu, idents = "SS")
healthy_cells <- WhichCells(combined_seu, idents = "Healthy")
# Function to add mean expression per group
calculate_mean_expression <- function(markers, group1_cells, group2_cells, expression_data) {
group1_mean <- rowMeans(expression_data[, group1_cells, drop = FALSE], na.rm = TRUE)
group2_mean <- rowMeans(expression_data[, group2_cells, drop = FALSE], na.rm = TRUE)
markers <- markers %>%
rownames_to_column("gene") %>%
mutate(
mean_expr_SS = group1_mean[gene],
mean_expr_Healthy = group2_mean[gene],
log2FC_manual = log2(mean_expr_SS + 1) - log2(mean_expr_Healthy + 1)
)
return(markers)
}
# Apply the function and save final result
markers_disease_with_mean <- calculate_mean_expression(markers_disease, ss_cells, healthy_cells, expression_data_RNA)
write.csv(markers_disease_with_mean, "DE_SS_vs_Healthy_with_MeanExpr_Herrera2021.csv", row.names = FALSE)
# Load required libraries
library(Seurat)
library(dplyr)
library(tibble)
combined_seu <- merged_seurat
# Join the layers of the RNA assay
combined_seu <- JoinLayers(combined_seu, assay = "RNA")
# Ensure your identity class is set to disease status
Idents(combined_seu) <- "condition" # e.g., levels: "SS", "Control"
# Run differential expression between SS vs Control
markers_disease <- FindMarkers(
object = combined_seu,
ident.1 = "SS",
ident.2 = "Healthy",
assay = "RNA",
logfc.threshold = 0,
min.pct = 0,
test.use = "wilcox"
)
# Save results to CSV
write.csv(markers_disease, file = "MergedSeurat_DE_SS_vs_Healthy_Herrara2021.csv", row.names = TRUE)
# Get log-normalized expression matrix (RNA assay)
expression_data_RNA <- GetAssayData(combined_seu, assay = "RNA", slot = "data")
# Get cell names for each group
ss_cells <- WhichCells(combined_seu, idents = "SS")
healthy_cells <- WhichCells(combined_seu, idents = "Healthy")
# Function to add mean expression per group
calculate_mean_expression <- function(markers, group1_cells, group2_cells, expression_data) {
group1_mean <- rowMeans(expression_data[, group1_cells, drop = FALSE], na.rm = TRUE)
group2_mean <- rowMeans(expression_data[, group2_cells, drop = FALSE], na.rm = TRUE)
markers <- markers %>%
rownames_to_column("gene") %>%
mutate(
mean_expr_SS = group1_mean[gene],
mean_expr_Healthy = group2_mean[gene],
log2FC_manual = log2(mean_expr_SS + 1) - log2(mean_expr_Healthy + 1)
)
return(markers)
}
# Apply the function and save final result
markers_disease_with_mean <- calculate_mean_expression(markers_disease, ss_cells, healthy_cells, expression_data_RNA)
write.csv(markers_disease_with_mean, "MergedSeurat_DE_SS_vs_Healthy_with_MeanExpr_Herrera2021.csv", row.names = FALSE)
#saveRDS(ss_vs_hc, file = "Sezary_Blood_Skin_vs_HC_TCR_filtered.rds")