table(merged_seurat$sample_id)
Healthy_Blood Healthy_Skin MF1_Blood MF1_Skin SS1_Blood SS1_Skin SS2_Blood
4503 50 5027 698 4042 1644 6814
SS2_Skin SS3_Blood SS3_Skin SS4_Blood SS4_Skin SS5_Blood SS6_Blood
834 2424 1072 2326 1556 3590 3239
table(merged_seurat$condition)
Healthy MF SS
4553 5725 27541
table(merged_seurat$tissue)
Blood Skin
31965 5854
# Mitochondrial content
merged_seurat[["percent.mt"]] <- PercentageFeatureSet(merged_seurat, pattern = "^MT-")
# Basic filtering
merged_seurat <- subset(merged_seurat, subset = nFeature_RNA > 200 & nFeature_RNA < 8000 & percent.mt < 10)
# QC plots
VlnPlot(merged_seurat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
Please use the `layer` argument instead.Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
Please use `rlang::check_installed()` instead.
FeatureScatter(merged_seurat, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
NA
NA
pct <- merged_seurat[["pca"]]@stdev / sum(merged_seurat[["pca"]]@stdev) * 100
cumu <- cumsum(pct)
co1 <- which(cumu > 90 & pct < 5)[1]
co2 <- sort(which((pct[-length(pct)] - pct[-1]) > 0.1), decreasing = TRUE)[1] + 1
min_pc <- min(co1, co2)
plot_df <- data.frame(pct = pct, cumu = cumu, rank = 1:length(pct))
ggplot(plot_df, aes(x = rank, y = pct)) +
geom_col() +
geom_point(aes(y = cumu), color = "red") +
geom_vline(xintercept = min_pc, linetype = "dashed", color = "blue") +
labs(title = "PCA Explained Variance", x = "PC", y = "% Variance")
min_pc
[1] 16
# Run Harmony integration if needed (optional)
# merged_seurat <- RunHarmony(merged_seurat, group.by.vars = "sample_id")
merged_seurat <- RunUMAP(merged_seurat, dims = 1:min_pc)
17:10:08 UMAP embedding parameters a = 0.9922 b = 1.112
17:10:08 Read 30345 rows and found 16 numeric columns
17:10:08 Using Annoy for neighbor search, n_neighbors = 30
17:10:08 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
17:10:10 Writing NN index file to temp file /tmp/RtmpwTyIcm/file6d1a2200108e6
17:10:10 Searching Annoy index using 1 thread, search_k = 3000
17:10:17 Annoy recall = 100%
17:10:17 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
17:10:19 Initializing from normalized Laplacian + noise (using RSpectra)
17:10:20 Commencing optimization for 200 epochs, with 1298492 positive edges
17:10:20 Using rng type: pcg
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
17:10:30 Optimization finished
merged_seurat <- FindNeighbors(merged_seurat, dims = 1:min_pc)
Computing nearest neighbor graph
Computing SNN
merged_seurat <- FindClusters(merged_seurat, resolution = 0.2)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 30345
Number of edges: 1096561
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9693
Number of communities: 20
Elapsed time: 4 seconds
# UMAP
DimPlot(merged_seurat, group.by = "condition", label = TRUE, repel = TRUE)
DimPlot(merged_seurat, group.by = "tissue", label = TRUE, repel = TRUE)
DimPlot(merged_seurat, label = TRUE)
DoHeatmap(merged_seurat, features = top10$gene) + NoLegend()
Warning: The following features were omitted as they were not found in the scale.data slot for the RNA assay: CA1, AHSP, SLC4A1, ALAS2, AC114752.3, RP11-501J20.5, ITGB3, IGHGP, CLEC2A, KLK8, GRHL3, CPXM2, SV2A, CTD-2006K23.1, MME, WDR86, CD177, NCAM1, SFTPC, HLX, NRG1, OXNAD1
top_50_up <- read.csv("../Gaydosik_Paper_Data_3_SS_Patients/top_50_upregulated.csv") # or read.delim("top_50_up.tsv")
top_50_down <- read.csv("../Gaydosik_Paper_Data_3_SS_Patients/top_50_downregulated.csv")
FeaturePlot(merged_seurat,
features = top_50_up$gene[1:10],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: The following requested variables were not found: PCLAF
FeaturePlot(merged_seurat,
features = top_50_up$gene[11:20],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(merged_seurat,
features = top_50_up$gene[21:30],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(merged_seurat,
features = top_50_up$gene[31:40],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(merged_seurat,
features = top_50_up$gene[41:50],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(merged_seurat,
features = top_50_down$gene[1:10],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: The following requested variables were not found: PCED1B-AS1, SNHG5
FeaturePlot(merged_seurat,
features = top_50_down$gene[11:20],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: The following requested variables were not found: RIPOR2
FeaturePlot(merged_seurat,
features = top_50_down$gene[21:30],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(merged_seurat,
features = top_50_down$gene[31:40],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: The following requested variables were not found: LINC01578
FeaturePlot(merged_seurat,
features = top_50_down$gene[41:50],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: The following requested variables were not found: AL138963.4
DimPlot(merged_seurat, group.by = "seurat_clusters", label = T, label.box = T, repel = T, reduction = "umap")
NA
NA
# Vector of genes to plot
up_genes <- c("CLIC1", "COX5A","GTSF1", "MAD2L1","MYBL2","MYL6B","NME1","PLK1", "PYCR1", "SLC25A5", "SRI", "TUBA1C", "UBE2T", "YWHAH")
# DotPlot with custom firebrick-red gradient
DotPlot(merged_seurat, features = up_genes) +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Upregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
# DotPlot with custom firebrick-red gradient
DotPlot(merged_seurat, features = up_genes, group.by = "sample_id") +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Upregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
# DotPlot with custom firebrick-red gradient
DotPlot(merged_seurat, features = up_genes, group.by = "tissue") +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Upregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
Warning: Scaling data with a low number of groups may produce misleading resultsScale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
# DotPlot with custom firebrick-red gradient
DotPlot(merged_seurat, features = up_genes, group.by = "condition") +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Upregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
Warning: Scaling data with a low number of groups may produce misleading resultsScale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
# Downregulated genes
down_genes <- c("TXNIP", "RASA3", "RIPOR2",
"ZFP36", "ZFP36L1", "ZFP36L2",
"PRMT2", "MAX", "PIK3IP1",
"BTG1", "CDKN1B")
# DotPlot with firebrick color for high expression
DotPlot(merged_seurat, features = down_genes) +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Downregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
Warning: The following requested variables were not found: RIPOR2Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
# DotPlot with firebrick color for high expression
DotPlot(merged_seurat, features = down_genes, group.by = "sample_id") +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Downregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
Warning: The following requested variables were not found: RIPOR2Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
# DotPlot with firebrick color for high expression
DotPlot(merged_seurat, features = down_genes, group.by = "tissue") +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Downregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
Warning: The following requested variables were not found: RIPOR2Warning: Scaling data with a low number of groups may produce misleading resultsScale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
# DotPlot with firebrick color for high expression
DotPlot(merged_seurat, features = down_genes, group.by = "condition") +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Downregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
Warning: The following requested variables were not found: RIPOR2Warning: Scaling data with a low number of groups may produce misleading resultsScale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
library(Seurat)
library(dplyr)
library(tibble)
# Set Idents to sample_id for DE grouping
Idents(merged_seurat) <- "sample_id"
# Define healthy reference groups
healthy_blood <- "Healthy_Blood"
healthy_skin <- "Healthy_Skin"
# Get all sample_ids
all_samples <- unique(merged_seurat$sample_id)
# Extract SS blood and skin samples by pattern
ss_blood_samples <- grep("^SS[0-9]+_Blood$", all_samples, value = TRUE)
ss_skin_samples <- grep("^SS[0-9]+_Skin$", all_samples, value = TRUE)
# Function to run DE and save results
run_DE_and_save <- function(case_sample, control_sample, assay_name = "RNA") {
message("Running DE: ", case_sample, " vs ", control_sample)
# Run DE using Wilcox test (can change to MAST if needed)
markers <- FindMarkers(
object = merged_seurat,
ident.1 = case_sample,
ident.2 = control_sample,
assay = assay_name,
logfc.threshold = 0,
min.pct = 0,
test.use = "wilcox"
)
# Expression matrix for mean expression calculations
expr_data <- GetAssayData(merged_seurat, assay = assay_name, slot = "data")
# Cells in each group
case_cells <- WhichCells(merged_seurat, idents = case_sample)
control_cells <- WhichCells(merged_seurat, idents = control_sample)
# Add mean expression and manual log2FC
markers <- markers %>%
rownames_to_column("gene") %>%
mutate(
mean_expr_case = rowMeans(expr_data[, case_cells, drop = FALSE], na.rm = TRUE)[gene],
mean_expr_control = rowMeans(expr_data[, control_cells, drop = FALSE], na.rm = TRUE)[gene],
log2FC_manual = log2(mean_expr_case + 1) - log2(mean_expr_control + 1)
)
# Write CSV output
filename <- paste0("DE_", case_sample, "_vs_", control_sample, ".csv")
write.csv(markers, filename, row.names = FALSE)
message("Saved DE to ", filename)
return(markers)
}
# Run DE for all SS Blood samples vs Healthy_Blood
for (ss_blood in ss_blood_samples) {
run_DE_and_save(ss_blood, healthy_blood)
}
Running DE: SS1_Blood vs Healthy_Blood
Saved DE to DE_SS1_Blood_vs_Healthy_Blood.csv
Running DE: SS2_Blood vs Healthy_Blood
Saved DE to DE_SS2_Blood_vs_Healthy_Blood.csv
Running DE: SS3_Blood vs Healthy_Blood
Saved DE to DE_SS3_Blood_vs_Healthy_Blood.csv
Running DE: SS4_Blood vs Healthy_Blood
Saved DE to DE_SS4_Blood_vs_Healthy_Blood.csv
Running DE: SS5_Blood vs Healthy_Blood
Saved DE to DE_SS5_Blood_vs_Healthy_Blood.csv
Running DE: SS6_Blood vs Healthy_Blood
Saved DE to DE_SS6_Blood_vs_Healthy_Blood.csv
# Run DE for all SS Skin samples vs Healthy_Skin
for (ss_skin in ss_skin_samples) {
run_DE_and_save(ss_skin, healthy_skin)
}
Running DE: SS1_Skin vs Healthy_Skin
Saved DE to DE_SS1_Skin_vs_Healthy_Skin.csv
Running DE: SS2_Skin vs Healthy_Skin
Saved DE to DE_SS2_Skin_vs_Healthy_Skin.csv
Running DE: SS3_Skin vs Healthy_Skin
Saved DE to DE_SS3_Skin_vs_Healthy_Skin.csv
Running DE: SS4_Skin vs Healthy_Skin
Saved DE to DE_SS4_Skin_vs_Healthy_Skin.csv
# Create a new group column to classify each cell
merged_seurat$DE_group <- case_when(
grepl("^SS[0-9]+_Blood$", merged_seurat$sample_id) ~ "SS_Blood",
merged_seurat$sample_id == "Healthy_Blood" ~ "Healthy_Blood",
grepl("^SS[0-9]+_Skin$", merged_seurat$sample_id) ~ "SS_Skin",
merged_seurat$sample_id == "Healthy_Skin" ~ "Healthy_Skin",
TRUE ~ NA_character_
)
# Check counts to ensure assignment worked
table(merged_seurat$DE_group)
Healthy_Blood Healthy_Skin SS_Blood SS_Skin
4386 33 19543 1210
Idents(merged_seurat) <- "DE_group"
# Function to perform grouped DE and save
run_grouped_DE <- function(group1, group2, assay_name = "RNA") {
message("Running DE: ", group1, " vs ", group2)
markers <- FindMarkers(
object = merged_seurat,
ident.1 = group1,
ident.2 = group2,
assay = assay_name,
logfc.threshold = 0,
min.pct = 0,
test.use = "wilcox"
)
expr_data <- GetAssayData(merged_seurat, assay = assay_name, slot = "data")
# Get cell names in each group
group1_cells <- WhichCells(merged_seurat, idents = group1)
group2_cells <- WhichCells(merged_seurat, idents = group2)
# Add expression summaries
markers <- markers %>%
rownames_to_column("gene") %>%
mutate(
mean_expr_group1 = rowMeans(expr_data[, group1_cells, drop = FALSE], na.rm = TRUE)[gene],
mean_expr_group2 = rowMeans(expr_data[, group2_cells, drop = FALSE], na.rm = TRUE)[gene],
log2FC_manual = log2(mean_expr_group1 + 1) - log2(mean_expr_group2 + 1)
)
# Write to CSV
filename <- paste0("Grouped_DE_", group1, "_vs_", group2, ".csv")
write.csv(markers, filename, row.names = FALSE)
message("Saved DE to ", filename)
return(markers)
}
# Run DE for SS_Blood vs Healthy_Blood
de_blood <- run_grouped_DE("SS_Blood", "Healthy_Blood")
Running DE: SS_Blood vs Healthy_Blood
Saved DE to Grouped_DE_SS_Blood_vs_Healthy_Blood.csv
# Run DE for SS_Skin vs Healthy_Skin
de_skin <- run_grouped_DE("SS_Skin", "Healthy_Skin")
Running DE: SS_Skin vs Healthy_Skin
Saved DE to Grouped_DE_SS_Skin_vs_Healthy_Skin.csv
saveRDS(merged_seurat, file = "Herrara_All_samples.rds")