Dataset: GSE146586 (Borcherding et
al., 2023). [https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE146586]
Includes scRNA-seq and paired TCR-seq from
Sézary syndrome (SS) patients and one healthy control. TCR is used to
identify the dominant malignant clone.
16# Base directory
base_dir <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Final_Year_Results/2025-Year3_Analysis/Biomarkers_Validation_with_Public_Data/Borch_Combined/data/"
# Seurat object
seurat_file <- file.path(base_dir, "ss_Borcherding_Malignant_6_Normal_1_Integrated_object.rds")
# TCR data directory
tcr_dir <- file.path(base_dir, "TCR_Data")
# Output directory
output_dir <- "Borcherding_output"
dir.create(output_dir, showWarnings = FALSE)
cat("Loading Seurat object...\n")
Loading Seurat object...
merged_seurat <- readRDS(seurat_file)
cat("Dataset dimensions:\n")
Dataset dimensions:
cat(sprintf(" - Cells: %d\n", ncol(merged_seurat)))
- Cells: 51095
cat(sprintf(" - Genes: %d\n", nrow(merged_seurat)))
- Genes: 19184
cat(sprintf(" - Samples: %s\n", paste(unique(merged_seurat$Sample_ID), collapse = ", ")))
- Samples: SS, CTRL
library(tidyr)
library(dplyr)
library(readr)
library(stringr)
# Define your directory path here
# tcr_dir <- "path/to/tcr_files"
# Define gzipped TCR file paths with sample names
tcr_files <- list(
SS2 = file.path(tcr_dir, "GSM4396048_CTCL2_filtered_contig_annotations.csv.gz"),
SS3 = file.path(tcr_dir, "GSM4396049_CTCL3_filtered_contig_annotations.csv.gz"),
SS4 = file.path(tcr_dir, "GSM4396050_CTCL4_filtered_contig_annotations.csv.gz"),
SS5 = file.path(tcr_dir, "GSM4396051_CTCL5_filtered_contig_annotations.csv.gz"),
SS6 = file.path(tcr_dir, "GSM4396052_CTCL6_filtered_contig_annotations.csv.gz")
)
cat("Loading gzipped TCR clonotype files:\n")
Loading gzipped TCR clonotype files:
# Load and filter productive TCR chains, clean barcode suffix
tcr_list <- lapply(names(tcr_files), function(sample_name) {
cat("Reading sample:", sample_name, "\n")
df <- read_csv(tcr_files[[sample_name]], show_col_types = FALSE)
df_filtered <- df %>%
filter(productive == "True") %>%
mutate(sample_id = sample_name) %>%
mutate(barcode = gsub("-1$", "", barcode)) # Remove "-1" suffix if present
return(df_filtered)
})
Reading sample: SS2
Reading sample: SS3
Reading sample: SS4
Reading sample: SS5
Reading sample: SS6
# Combine all samples into one dataframe
combined_tcr <- bind_rows(tcr_list)
cat("Columns in combined TCR data:\n")
Columns in combined TCR data:
print(colnames(combined_tcr))
[1] "barcode" "is_cell" "contig_id" "high_confidence" "length"
[6] "chain" "v_gene" "d_gene" "j_gene" "c_gene"
[11] "full_length" "productive" "cdr3" "cdr3_nt" "reads"
[16] "umis" "raw_clonotype_id" "raw_consensus_id" "sample_id"
cat("Total productive TCR chains across all samples:", nrow(combined_tcr), "\n")
Total productive TCR chains across all samples: 74426
cat("Sample barcodes (first 10):\n")
Sample barcodes (first 10):
print(head(combined_tcr$barcode, 10))
[1] "AAACCTGAGACAAAGG" "AAACCTGAGAGTCTGG" "AAACCTGAGAGTCTGG" "AAACCTGAGAGTGAGA" "AAACCTGAGAGTGAGA"
[6] "AAACCTGAGCCACCTG" "AAACCTGAGCCACCTG" "AAACCTGAGCCACGTC" "AAACCTGAGCCACGTC" "AAACCTGAGCCATCGC"
# Mapping to match Seurat barcodes
prefix_map <- c(
"SS2" = "SS_P2",
"SS3" = "SS_P3",
"SS4" = "SS_P4",
"SS5" = "SS_P5",
"SS6" = "SS_P6"
)
convert_tcr_barcode <- function(barcode, sample) {
new_prefix <- prefix_map[sample]
cell_bc <- str_remove(barcode, paste0("^", sample, "_sample_id_")) # Remove old prefix
paste0(new_prefix, "_", cell_bc, "-1") # Add Seurat-style suffix
}
# Apply barcode transformation
combined_tcr <- combined_tcr %>%
rowwise() %>%
mutate(barcode_seurat = convert_tcr_barcode(barcode, sample_id)) %>%
ungroup()
cat("Transformed barcodes (first 10):\n")
Transformed barcodes (first 10):
print(head(combined_tcr$barcode_seurat, 10))
[1] "SS_P2_AAACCTGAGACAAAGG-1" "SS_P2_AAACCTGAGAGTCTGG-1" "SS_P2_AAACCTGAGAGTCTGG-1"
[4] "SS_P2_AAACCTGAGAGTGAGA-1" "SS_P2_AAACCTGAGAGTGAGA-1" "SS_P2_AAACCTGAGCCACCTG-1"
[7] "SS_P2_AAACCTGAGCCACCTG-1" "SS_P2_AAACCTGAGCCACGTC-1" "SS_P2_AAACCTGAGCCACGTC-1"
[10] "SS_P2_AAACCTGAGCCATCGC-1"
library(dplyr)
library(scRepertoire)
library(Seurat)
library(stringr)
# Define mapping from TCR sample ids to Seurat prefixes
sample_prefix_map <- c(
"SS2" = "SS_P2",
"SS3" = "SS_P3",
"SS4" = "SS_P4",
"SS5" = "SS_P5",
"SS6" = "SS_P6"
)
# Assume combined_tcr dataframe contains:
# - barcode_seurat: corrected barcode with prefix+cellbarcode+"-1"
# - sample_id: sample identifier like "SS2", "SS3", etc.
# - raw_clonotype_id or CTstrict: clonotype identifier column from TCR
# Prepare TCR metadata with corrected barcodes:
# Fix possible NA_id_ prefix by replacing with mapped Seurat prefix
tcr_metadata <- combined_tcr %>%
mutate(barcode_seurat = str_replace(barcode_seurat, "^NA_id_", paste0(sample_prefix_map[sample_id], "_"))) %>%
distinct(barcode_seurat, sample_id, raw_clonotype_id) %>%
rename(raw_clonotype_id = raw_clonotype_id, barcode = barcode_seurat)
# Check barcode matching
common_barcodes <- intersect(tcr_metadata$barcode, colnames(merged_seurat))
cat("Number of matching barcodes:", length(common_barcodes), "\n")
Number of matching barcodes: 36575
if (length(common_barcodes) == 0) {
stop("No matching barcodes found between TCR and Seurat object after barcode correction")
}
# Create a named vector for clonotype metadata
clonotype_vector <- tcr_metadata$raw_clonotype_id
names(clonotype_vector) <- tcr_metadata$barcode
# Add clonotype metadata to Seurat object
merged_seurat <- AddMetaData(object = merged_seurat,
metadata = clonotype_vector,
col.name = "clonotype")
# Inspect to confirm success
head(merged_seurat@meta.data)
# Optional: View clonotype distribution
table(merged_seurat$clonotype, useNA = "ifany")
clonotype1 clonotype10 clonotype100 clonotype1000 clonotype1001 clonotype1002 clonotype1003
24770 28 5 2 2 1 1
clonotype1004 clonotype1005 clonotype1006 clonotype1007 clonotype1008 clonotype1009 clonotype101
2 1 1 2 1 2 5
clonotype1010 clonotype1011 clonotype1012 clonotype1013 clonotype1014 clonotype1015 clonotype1016
2 2 2 2 2 2 2
clonotype1017 clonotype1018 clonotype1019 clonotype102 clonotype1020 clonotype1021 clonotype1022
2 2 1 5 2 2 2
clonotype1023 clonotype1024 clonotype1025 clonotype1026 clonotype1027 clonotype1028 clonotype1029
2 1 2 2 2 2 2
clonotype103 clonotype1030 clonotype1031 clonotype1032 clonotype1033 clonotype1034 clonotype1035
5 1 2 2 2 2 2
clonotype1036 clonotype1037 clonotype1038 clonotype1039 clonotype104 clonotype1040 clonotype1041
2 2 2 2 5 2 2
clonotype1042 clonotype1043 clonotype1044 clonotype1045 clonotype1046 clonotype1047 clonotype1048
2 2 2 2 2 2 2
clonotype1049 clonotype105 clonotype1050 clonotype1051 clonotype1052 clonotype1053 clonotype1054
2 5 2 1 2 2 2
clonotype1055 clonotype1056 clonotype1057 clonotype1058 clonotype1059 clonotype106 clonotype1060
2 2 2 2 2 5 2
clonotype1061 clonotype1062 clonotype1064 clonotype1066 clonotype1067 clonotype1068 clonotype1069
1 1 1 1 1 1 1
clonotype107 clonotype1070 clonotype1071 clonotype1072 clonotype1073 clonotype1074 clonotype1075
5 1 1 1 1 1 1
clonotype1076 clonotype1077 clonotype1078 clonotype1079 clonotype108 clonotype1080 clonotype1081
1 1 1 1 4 1 1
clonotype1082 clonotype1083 clonotype1084 clonotype1085 clonotype1086 clonotype1088 clonotype109
1 1 1 1 1 1 4
clonotype1090 clonotype1091 clonotype1092 clonotype1093 clonotype1094 clonotype1095 clonotype1096
1 1 1 1 1 1 1
clonotype1097 clonotype1098 clonotype1099 clonotype11 clonotype110 clonotype1100 clonotype1101
1 1 1 24 5 1 1
clonotype1102 clonotype1103 clonotype1104 clonotype1105 clonotype1106 clonotype1107 clonotype111
1 1 1 1 1 1 5
clonotype1110 clonotype1111 clonotype1112 clonotype1113 clonotype1114 clonotype1115 clonotype1117
1 1 1 1 1 1 1
clonotype1118 clonotype1119 clonotype112 clonotype1120 clonotype1121 clonotype1122 clonotype1123
1 1 4 1 1 1 1
clonotype1124 clonotype1125 clonotype1126 clonotype1127 clonotype1128 clonotype1129 clonotype113
1 1 1 1 1 1 5
clonotype1130 clonotype1131 clonotype1132 clonotype1133 clonotype1134 clonotype1135 clonotype1136
1 1 1 1 1 1 1
clonotype1137 clonotype1138 clonotype1139 clonotype114 clonotype1140 clonotype1141 clonotype1142
1 1 1 5 1 1 1
clonotype1143 clonotype1144 clonotype1145 clonotype1146 clonotype1147 clonotype1148 clonotype1149
1 1 1 1 1 1 1
clonotype115 clonotype1150 clonotype1151 clonotype1152 clonotype1153 clonotype1154 clonotype1155
5 1 1 1 1 1 1
clonotype1156 clonotype1158 clonotype1159 clonotype116 clonotype1160 clonotype1161 clonotype1162
1 1 1 5 1 1 1
clonotype1163 clonotype1164 clonotype1165 clonotype1166 clonotype1167 clonotype1168 clonotype1169
1 1 1 1 1 1 1
clonotype117 clonotype1170 clonotype1171 clonotype1172 clonotype1173 clonotype1174 clonotype1175
5 1 1 1 1 1 1
clonotype1176 clonotype1177 clonotype1178 clonotype1179 clonotype118 clonotype1180 clonotype1181
1 1 1 1 5 1 1
clonotype1182 clonotype1183 clonotype1185 clonotype1186 clonotype1187 clonotype1188 clonotype119
1 1 1 1 1 1 5
clonotype1190 clonotype1191 clonotype1192 clonotype1193 clonotype1194 clonotype1195 clonotype1196
1 1 1 1 1 1 1
clonotype1197 clonotype1198 clonotype1199 clonotype12 clonotype120 clonotype1200 clonotype1202
1 1 1 22 5 1 1
clonotype1203 clonotype1204 clonotype1205 clonotype1208 clonotype1209 clonotype121 clonotype1210
1 1 1 1 1 4 1
clonotype1211 clonotype1212 clonotype1213 clonotype1214 clonotype1215 clonotype1216 clonotype1217
1 1 1 1 1 1 1
clonotype1218 clonotype1219 clonotype122 clonotype1220 clonotype1221 clonotype1222 clonotype1223
1 1 3 1 1 1 1
clonotype1224 clonotype1225 clonotype1226 clonotype1227 clonotype1228 clonotype1229 clonotype123
1 1 1 1 1 1 3
clonotype1230 clonotype1232 clonotype1233 clonotype1234 clonotype1235 clonotype1236 clonotype1237
1 1 1 1 1 1 1
clonotype1238 clonotype1239 clonotype124 clonotype1240 clonotype1242 clonotype1243 clonotype1245
1 1 4 1 1 1 1
clonotype1246 clonotype1247 clonotype1248 clonotype1249 clonotype125 clonotype1250 clonotype1251
1 1 1 1 4 1 1
clonotype1252 clonotype1254 clonotype1255 clonotype1256 clonotype1257 clonotype1259 clonotype126
1 1 1 1 1 1 4
clonotype1260 clonotype1261 clonotype1262 clonotype1263 clonotype1264 clonotype1265 clonotype1267
1 1 1 1 1 1 1
clonotype1268 clonotype1269 clonotype127 clonotype1271 clonotype1272 clonotype1273 clonotype1274
1 1 4 1 1 1 1
clonotype1275 clonotype1277 clonotype1278 clonotype1279 clonotype128 clonotype1280 clonotype1281
1 1 1 1 4 1 1
clonotype1282 clonotype1283 clonotype1284 clonotype1285 clonotype1287 clonotype1288 clonotype1289
1 1 1 1 1 1 1
clonotype129 clonotype1290 clonotype1291 clonotype1292 clonotype1293 clonotype1294 clonotype1295
4 1 1 1 1 1 1
clonotype1296 clonotype1297 clonotype1298 clonotype1299 clonotype13 clonotype130 clonotype1301
1 1 1 1 22 3 1
clonotype1302 clonotype1303 clonotype1304 clonotype1305 clonotype1307 clonotype1308 clonotype1309
1 1 1 1 1 1 1
clonotype131 clonotype1310 clonotype1311 clonotype1312 clonotype1313 clonotype1314 clonotype1315
4 1 1 1 1 1 1
clonotype1316 clonotype1319 clonotype132 clonotype1321 clonotype1322 clonotype1323 clonotype1324
1 1 4 1 1 1 1
clonotype1325 clonotype1326 clonotype1327 clonotype1328 clonotype1329 clonotype133 clonotype1330
1 1 1 1 1 4 1
clonotype1331 clonotype1332 clonotype1334 clonotype1335 clonotype1336 clonotype1337 clonotype1338
1 1 1 1 1 1 1
clonotype1339 clonotype134 clonotype1340 clonotype1341 clonotype1342 clonotype1343 clonotype1344
1 4 1 1 1 1 1
clonotype1345 clonotype1346 clonotype1347 clonotype1348 clonotype1349 clonotype135 clonotype1350
1 1 1 1 1 4 1
clonotype1351 clonotype1352 clonotype1353 clonotype1355 clonotype1356 clonotype1357 clonotype1358
1 1 1 1 1 1 1
clonotype1359 clonotype136 clonotype1360 clonotype1361 clonotype1362 clonotype1363 clonotype1364
1 4 1 1 1 1 1
clonotype1366 clonotype1368 clonotype1369 clonotype137 clonotype1370 clonotype1371 clonotype1372
1 1 1 4 1 1 1
clonotype1373 clonotype1374 clonotype1375 clonotype1376 clonotype1377 clonotype1378 clonotype1379
1 1 1 1 1 1 1
clonotype138 clonotype1380 clonotype1381 clonotype1382 clonotype1383 clonotype1384 clonotype1385
4 1 1 1 1 1 1
clonotype1386 clonotype1387 clonotype1388 clonotype139 clonotype1391 clonotype1392 clonotype1393
1 1 1 4 1 1 1
clonotype1394 clonotype1395 clonotype1396 clonotype1397 clonotype1398 clonotype1399 clonotype14
1 1 1 1 1 1 18
clonotype140 clonotype1400 clonotype1401 clonotype1402 clonotype1403 clonotype1404 clonotype1406
4 1 1 1 1 1 1
clonotype1407 clonotype1408 clonotype1409 clonotype141 clonotype1410 clonotype1411 clonotype1412
1 1 1 4 1 1 1
clonotype1413 clonotype1414 clonotype1416 clonotype1418 clonotype1419 clonotype142 clonotype1421
1 1 1 1 1 4 1
clonotype1422 clonotype1423 clonotype1424 clonotype1425 clonotype1426 clonotype1427 clonotype1428
1 1 1 1 1 1 1
clonotype1429 clonotype143 clonotype1431 clonotype1432 clonotype1433 clonotype1434 clonotype1435
1 4 1 1 1 1 1
clonotype1437 clonotype1438 clonotype1439 clonotype144 clonotype1440 clonotype1441 clonotype1442
1 1 1 3 1 1 1
clonotype1443 clonotype1444 clonotype1445 clonotype1446 clonotype1447 clonotype1448 clonotype145
1 1 1 1 1 1 4
clonotype1450 clonotype1451 clonotype1452 clonotype1453 clonotype1454 clonotype1455 clonotype1456
1 1 1 1 1 1 1
clonotype1457 clonotype1458 clonotype1459 clonotype146 clonotype1460 clonotype1461 clonotype1462
1 1 1 4 1 1 1
clonotype1463 clonotype1464 clonotype1465 clonotype1466 clonotype1467 clonotype1468 clonotype1469
1 1 1 1 1 1 1
clonotype147 clonotype1470 clonotype1471 clonotype1472 clonotype1474 clonotype1475 clonotype1476
3 1 1 1 1 1 1
clonotype1477 clonotype1478 clonotype1479 clonotype148 clonotype1480 clonotype1481 clonotype1482
1 1 1 4 1 1 1
clonotype1483 clonotype1484 clonotype1485 clonotype1486 clonotype1488 clonotype1489 clonotype149
1 1 1 1 1 1 4
clonotype1490 clonotype1492 clonotype1493 clonotype1494 clonotype1495 clonotype1496 clonotype1497
1 1 1 1 1 1 1
clonotype1498 clonotype15 clonotype150 clonotype1500 clonotype1501 clonotype1502 clonotype1504
1 18 4 1 1 1 1
clonotype1505 clonotype1506 clonotype1507 clonotype1508 clonotype1509 clonotype151 clonotype1510
1 1 1 1 1 4 1
clonotype1511 clonotype1512 clonotype1513 clonotype1514 clonotype1515 clonotype1516 clonotype1517
1 1 1 1 1 1 1
clonotype1518 clonotype1519 clonotype152 clonotype1520 clonotype1521 clonotype1522 clonotype1523
1 1 4 1 1 1 1
clonotype1524 clonotype1525 clonotype1526 clonotype1527 clonotype1528 clonotype1529 clonotype153
1 1 1 1 1 1 4
clonotype1530 clonotype1531 clonotype1532 clonotype1533 clonotype1534 clonotype1535 clonotype1536
1 1 1 1 1 1 1
clonotype1537 clonotype1538 clonotype1539 clonotype154 clonotype1540 clonotype1541 clonotype1542
1 1 1 4 1 1 1
clonotype1543 clonotype1544 clonotype1545 clonotype1546 clonotype1547 clonotype1548 clonotype1549
1 1 1 1 1 1 1
clonotype155 clonotype1550 clonotype1551 clonotype1552 clonotype1554 clonotype1555 clonotype1556
4 1 1 1 1 1 1
clonotype1557 clonotype1558 clonotype156 clonotype1560 clonotype1561 clonotype1562 clonotype1563
1 1 4 1 1 1 1
clonotype1564 clonotype1565 clonotype1566 clonotype1568 clonotype1569 clonotype157 clonotype1570
1 1 1 1 1 4 1
clonotype1571 clonotype1572 clonotype1573 clonotype1574 clonotype1575 clonotype1576 clonotype1577
1 1 1 1 1 1 1
clonotype1578 clonotype1579 clonotype158 clonotype1580 clonotype1581 clonotype1582 clonotype1583
1 1 4 1 1 1 1
clonotype1584 clonotype1585 clonotype1586 clonotype1587 clonotype1588 clonotype1589 clonotype159
1 1 1 1 1 1 4
clonotype1590 clonotype1591 clonotype1592 clonotype1593 clonotype1594 clonotype1595 clonotype1596
1 1 1 1 1 1 1
clonotype1597 clonotype1598 clonotype16 clonotype160 clonotype1600 clonotype1601 clonotype161
1 1 15 3 1 1 4
clonotype162 clonotype163 clonotype164 clonotype165 clonotype166 clonotype167 clonotype168
3 4 3 4 3 3 4
clonotype169 clonotype17 clonotype170 clonotype171 clonotype172 clonotype173 clonotype174
4 15 4 4 4 3 4
clonotype175 clonotype176 clonotype177 clonotype178 clonotype179 clonotype18 clonotype180
3 4 4 4 4 13 4
clonotype181 clonotype182 clonotype183 clonotype184 clonotype185 clonotype186 clonotype187
4 4 4 4 4 4 4
clonotype188 clonotype189 clonotype19 clonotype190 clonotype191 clonotype192 clonotype193
4 4 14 4 2 4 4
clonotype194 clonotype195 clonotype196 clonotype197 clonotype198 clonotype199 clonotype2
4 4 3 4 4 4 7241
clonotype20 clonotype200 clonotype201 clonotype202 clonotype203 clonotype204 clonotype205
14 4 4 4 4 4 4
clonotype206 clonotype207 clonotype208 clonotype209 clonotype21 clonotype210 clonotype211
4 3 4 4 14 4 4
clonotype212 clonotype213 clonotype214 clonotype215 clonotype216 clonotype217 clonotype218
4 4 4 4 4 3 4
clonotype219 clonotype22 clonotype220 clonotype221 clonotype222 clonotype223 clonotype224
4 11 4 4 4 3 4
clonotype225 clonotype226 clonotype227 clonotype228 clonotype229 clonotype23 clonotype230
4 4 4 4 3 12 4
clonotype231 clonotype232 clonotype233 clonotype234 clonotype235 clonotype236 clonotype237
3 3 4 4 3 4 4
clonotype238 clonotype239 clonotype24 clonotype240 clonotype241 clonotype242 clonotype243
4 4 10 4 4 4 4
clonotype244 clonotype245 clonotype246 clonotype247 clonotype248 clonotype249 clonotype25
4 4 4 4 4 4 12
clonotype250 clonotype251 clonotype252 clonotype253 clonotype254 clonotype255 clonotype256
4 4 4 4 4 3 4
clonotype257 clonotype258 clonotype259 clonotype26 clonotype260 clonotype261 clonotype262
4 3 3 10 3 2 3
clonotype263 clonotype264 clonotype265 clonotype266 clonotype267 clonotype268 clonotype269
3 3 3 3 2 3 3
clonotype27 clonotype270 clonotype271 clonotype272 clonotype273 clonotype274 clonotype275
10 3 2 2 3 3 3
clonotype276 clonotype277 clonotype278 clonotype279 clonotype28 clonotype280 clonotype281
3 2 2 3 9 3 3
clonotype282 clonotype283 clonotype284 clonotype285 clonotype286 clonotype287 clonotype288
2 2 3 3 3 2 3
clonotype289 clonotype29 clonotype290 clonotype291 clonotype292 clonotype293 clonotype294
3 9 3 3 3 3 2
clonotype295 clonotype296 clonotype297 clonotype298 clonotype299 clonotype3 clonotype30
2 3 3 3 2 221 9
clonotype300 clonotype301 clonotype302 clonotype303 clonotype304 clonotype305 clonotype306
3 3 3 3 3 3 3
clonotype307 clonotype308 clonotype309 clonotype31 clonotype310 clonotype311 clonotype312
3 2 3 8 3 3 2
clonotype313 clonotype314 clonotype315 clonotype316 clonotype317 clonotype318 clonotype319
3 2 3 3 3 3 2
clonotype32 clonotype320 clonotype321 clonotype322 clonotype323 clonotype324 clonotype325
9 3 3 2 3 3 3
clonotype326 clonotype327 clonotype328 clonotype329 clonotype33 clonotype330 clonotype331
3 2 3 3 8 3 3
clonotype332 clonotype333 clonotype334 clonotype335 clonotype336 clonotype337 clonotype338
3 3 2 3 3 3 2
clonotype339 clonotype34 clonotype340 clonotype341 clonotype342 clonotype343 clonotype344
3 9 3 1 3 2 3
clonotype345 clonotype346 clonotype347 clonotype348 clonotype349 clonotype35 clonotype350
3 3 2 3 3 8 2
clonotype351 clonotype352 clonotype353 clonotype354 clonotype355 clonotype356 clonotype357
3 3 2 3 3 3 3
clonotype358 clonotype359 clonotype36 clonotype360 clonotype361 clonotype362 clonotype363
3 2 8 2 3 3 3
clonotype364 clonotype365 clonotype366 clonotype367 clonotype368 clonotype369 clonotype37
2 3 2 3 3 3 8
clonotype370 clonotype371 clonotype372 clonotype373 clonotype374 clonotype375 clonotype376
2 2 2 3 2 3 3
clonotype377 clonotype378 clonotype379 clonotype38 clonotype380 clonotype381 clonotype382
3 2 3 7 3 3 3
clonotype383 clonotype384 clonotype385 clonotype386 clonotype387 clonotype388 clonotype389
3 3 3 3 3 2 3
clonotype39 clonotype390 clonotype391 clonotype392 clonotype393 clonotype394 clonotype395
7 3 3 3 3 3 3
clonotype396 clonotype397 clonotype398 clonotype399 clonotype4 clonotype40 clonotype400
3 3 2 2 87 6 2
clonotype401 clonotype402 clonotype403 clonotype404 clonotype405 clonotype406 clonotype407
1 3 3 3 3 2 2
clonotype408 clonotype409 clonotype41 clonotype410 clonotype411 clonotype412 clonotype413
3 3 7 3 3 1 3
clonotype414 clonotype415 clonotype416 clonotype417 clonotype418 clonotype419 clonotype42
3 3 2 3 3 2 7
clonotype420 clonotype421 clonotype422 clonotype423 clonotype424 clonotype425 clonotype426
3 3 3 3 3 3 3
clonotype427 clonotype428 clonotype429 clonotype43 clonotype430 clonotype431 clonotype432
2 3 2 7 3 3 3
clonotype433 clonotype434 clonotype435 clonotype436 clonotype437 clonotype438 clonotype439
2 3 3 2 2 3 3
clonotype44 clonotype440 clonotype441 clonotype442 clonotype443 clonotype444 clonotype445
7 3 3 3 3 3 3
clonotype446 clonotype447 clonotype448 clonotype449 clonotype45 clonotype450 clonotype451
3 3 3 3 6 3 3
clonotype452 clonotype453 clonotype454 clonotype455 clonotype456 clonotype457 clonotype458
3 3 2 3 1 3 2
clonotype459 clonotype46 clonotype460 clonotype461 clonotype462 clonotype463 clonotype464
3 7 3 3 3 3 3
clonotype465 clonotype466 clonotype467 clonotype468 clonotype469 clonotype47 clonotype470
3 3 3 3 3 7 3
clonotype471 clonotype472 clonotype473 clonotype474 clonotype475 clonotype476 clonotype477
3 3 3 3 3 1 2
clonotype478 clonotype479 clonotype48 clonotype480 clonotype481 clonotype482 clonotype483
2 2 6 3 3 3 3
clonotype484 clonotype485 clonotype486 clonotype487 clonotype488 clonotype489 clonotype49
3 3 3 3 3 3 7
clonotype490 clonotype491 clonotype492 clonotype493 clonotype494 clonotype495 clonotype496
2 3 3 3 3 3 3
clonotype497 clonotype498 clonotype499 clonotype5 clonotype50 clonotype500
3 2 2 77 6 3
[ reached 'max' / getOption("max.print") -- omitted 554 entries ]
# Since your Seurat metadata Sample_ID only contains "SS" and "CTRL",
# subset malignant cells only within the "SS" samples (Sézary syndrome samples)
ss_samples <- c("SS")
malignant_cells_list <- list()
for (s in ss_samples) {
# Subset Seurat object to the sample of interest
sample_cells <- subset(merged_seurat, subset = Sample_ID == s)
# Check that clonotype metadata exists and is not empty
if (!"clonotype" %in% colnames(sample_cells@meta.data)) next
if (length(sample_cells$clonotype) == 0) next
# Count clonotype frequencies
clonotype_counts <- table(sample_cells$clonotype)
# Identify expanded clones with clone size >= 5 cells
expanded_clones <- names(clonotype_counts[clonotype_counts >= 5])
if (length(expanded_clones) == 0) next
# Subset cells belonging to expanded clonotypes (putative malignant cells)
malignant <- subset(sample_cells, subset = clonotype %in% expanded_clones)
# Add to list if nonempty
if (ncol(malignant) > 0) malignant_cells_list[[s]] <- malignant
}
# Remove null elements and unname list
malignant_cells_list <- malignant_cells_list[!sapply(malignant_cells_list, is.null)]
malignant_cells_list <- unname(malignant_cells_list)
# Merge all malignant Sezary cells across samples into one Seurat object
sezary_cells <- Reduce(function(x, y) merge(x, y), malignant_cells_list)
Idents(sezary_cells) <- "sample"
library(scRepertoire)
library(Seurat)
# ================================
# Clonal homeostasis (size distribution)
# ================================
clonalHomeostasis(sezary_cells, cloneCall = "clonotype")
# ================================
# Clonal proportion / relative abundance
# ================================
clonalProportion(sezary_cells, cloneCall = "clonotype")
NA
NA
# ================================
# Add tissue grouping column
# ================================
ss_vs_hc$tissue_group <- case_when(
ss_vs_hc$orig.ident %in% c("Control") ~ "HC", # assuming Control is blood
ss_vs_hc$orig.ident %in% c("SS_P2", "SS_P3", "SS_P4", "SS_P5", "SS_P6") ~ "SS", # or Skin if known
TRUE ~ "Other"
)
# Quick check
table(ss_vs_hc$tissue_group)
HC SS
4437 33393
# ================================
# Visualize CLIC1 expression
# ================================
# FeaturePlot with red gradient
FeaturePlot(ss_vs_hc, features = "CLIC1", reduction = "umap",
cols = c("grey90", "red"), label = TRUE, repel = T)
# DotPlot by sample_id with red-grey gradient
DotPlot(ss_vs_hc, features = "CLIC1", group.by = "Sample_ID") +
RotatedAxis() +
scale_color_gradient(low = "grey90", high = "red") +
ggtitle("CLIC1 Expression by Sample") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
# DotPlot by tissue_group with red-grey gradient
DotPlot(ss_vs_hc, features = "CLIC1", group.by = "orig.ident") +
RotatedAxis() +
scale_color_gradient(low = "grey90", high = "red") +
ggtitle("CLIC1 Expression by Tissue") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
# ================================
# Visualize CLIC1
# ================================
# FeaturePlot
FeaturePlot(ss_vs_hc, features = "CLIC1", reduction = "umap")
# DotPlot grouped by sample
DotPlot(ss_vs_hc, features = "CLIC1", group.by = "Sample_ID") + RotatedAxis()
# DotPlot grouped by tissue
DotPlot(ss_vs_hc, features = "CLIC1", group.by = "orig.ident") + RotatedAxis()
# DimPlot by sample
DimPlot(ss_vs_hc, group.by = "Sample_ID", reduction = "umap")
# FeaturePlot for CLIC1
FeaturePlot(ss_vs_hc, features = "CLIC1", reduction = "umap")
# DotPlot for CLIC1 across groups
DotPlot(ss_vs_hc, features = "CLIC1", group.by = "Sample_ID") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# FeaturePlot with red gradient
FeaturePlot(ss_vs_hc, features = "CLIC1", reduction = "umap",
cols = c("grey90", "red"), label = TRUE, repel = T)
top_50_up <- read.csv("top_50_upregulated.csv") # or read.delim("top_50_up.tsv")
top_50_down <- read.csv("top_50_downregulated.csv")
FeaturePlot(ss_vs_hc,
features = top_50_up$gene[1:10],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_vs_hc,
features = top_50_up$gene[11:20],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_vs_hc,
features = top_50_up$gene[21:30],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_vs_hc,
features = top_50_up$gene[31:40],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_vs_hc,
features = top_50_up$gene[41:50],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_vs_hc,
features = top_50_down$gene[1:10],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_vs_hc,
features = top_50_down$gene[11:20],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_vs_hc,
features = top_50_down$gene[21:30],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_vs_hc,
features = top_50_down$gene[31:40],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_vs_hc,
features = top_50_down$gene[41:50],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
NA
NA
NA
# Vector of genes to plot
up_genes <- c("CLIC1", "COX5A","GTSF1", "MAD2L1","MYBL2","MYL6B","NME1","PLK1", "PYCR1", "SLC25A5", "SRI", "TUBA1C", "UBE2T", "YWHAH")
# DotPlot with custom firebrick-red gradient
DotPlot(ss_vs_hc, features = up_genes) +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Upregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
# DotPlot with custom firebrick-red gradient
DotPlot(ss_vs_hc, features = up_genes, group.by = "Sample_ID") +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Upregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
# DotPlot with custom firebrick-red gradient
DotPlot(ss_vs_hc, features = up_genes, group.by = "orig.ident") +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Upregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
# Downregulated genes
down_genes <- c("TXNIP", "RASA3", "RIPOR2",
"ZFP36", "ZFP36L1", "ZFP36L2",
"PRMT2", "MAX", "PIK3IP1",
"BTG1", "CDKN1B")
# DotPlot with firebrick color for high expression
DotPlot(ss_vs_hc, features = down_genes) +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Downregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
# DotPlot with firebrick color for high expression
DotPlot(ss_vs_hc, features = down_genes, group.by = "Sample_ID") +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Downregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
# DotPlot with firebrick color for high expression
DotPlot(ss_vs_hc, features = down_genes, group.by = "orig.ident") +
RotatedAxis() +
scale_color_gradient2(low = "lightblue", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Downregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
NA
NA
NA
# Load required libraries
library(Seurat)
library(dplyr)
library(tibble)
table(merged_seurat$Sample_ID)
CTRL SS
4437 46658
combined_seu <- merged_seurat
# Join the layers of the RNA assay
combined_seu <- JoinLayers(combined_seu, assay = "RNA")
# Ensure your identity class is set to disease status
Idents(combined_seu) <- "Sample_ID" # e.g., levels: "SS", "Control"
# Run differential expression between SS vs Control
markers_disease <- FindMarkers(
object = combined_seu,
ident.1 = "SS",
ident.2 = "CTRL",
assay = "RNA",
logfc.threshold = 0,
min.pct = 0,
test.use = "wilcox"
)
# Save results to CSV
write.csv(markers_disease, file = "MergedSeurat_DE_SS_vs_Healthy_Borcherding2023.csv", row.names = TRUE)
# Get log-normalized expression matrix (RNA assay)
expression_data_RNA <- GetAssayData(combined_seu, assay = "RNA", slot = "data")
# Get cell names for each group
ss_cells <- WhichCells(combined_seu, idents = "SS")
healthy_cells <- WhichCells(combined_seu, idents = "CTRL")
# Function to add mean expression per group
calculate_mean_expression <- function(markers, group1_cells, group2_cells, expression_data) {
group1_mean <- rowMeans(expression_data[, group1_cells, drop = FALSE], na.rm = TRUE)
group2_mean <- rowMeans(expression_data[, group2_cells, drop = FALSE], na.rm = TRUE)
markers <- markers %>%
rownames_to_column("gene") %>%
mutate(
mean_expr_SS = group1_mean[gene],
mean_expr_Healthy = group2_mean[gene],
log2FC_manual = log2(mean_expr_SS + 1) - log2(mean_expr_Healthy + 1)
)
return(markers)
}
# Apply the function and save final result
markers_disease_with_mean <- calculate_mean_expression(markers_disease, ss_cells, healthy_cells, expression_data_RNA)
write.csv(markers_disease_with_mean, "MergedSeurat_DE_SS_vs_Healthy_with_MeanExpr_Borcherding2023.csv", row.names = FALSE)
sessionInfo()
R version 4.5.2 (2025-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 24.04.3 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.12.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0 LAPACK version 3.12.0
locale:
[1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C LC_TIME=fr_FR.UTF-8 LC_COLLATE=en_GB.UTF-8
[5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=en_GB.UTF-8 LC_PAPER=fr_FR.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Paris
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] stringr_1.6.0 readr_2.1.5 tidyr_1.3.1 patchwork_1.3.2 rstatix_0.7.3 ggpubr_0.6.2
[7] scRepertoire_2.6.1 ggplot2_4.0.0 tibble_3.3.0 dplyr_1.1.4 Seurat_5.3.1 SeuratObject_5.2.0
[13] sp_2.2-0
loaded via a namespace (and not attached):
[1] RcppAnnoy_0.0.22 splines_4.5.2 later_1.4.4 polyclip_1.10-7
[5] fastDummies_1.7.5 lifecycle_1.0.4 vroom_1.6.6 globals_0.18.0
[9] lattice_0.22-7 MASS_7.3-65 backports_1.5.0 magrittr_2.0.4
[13] limma_3.66.0 sass_0.4.10 plotly_4.11.0 rmarkdown_2.30
[17] jquerylib_0.1.4 yaml_2.3.10 httpuv_1.6.16 otel_0.2.0
[21] sctransform_0.4.2 spam_2.11-1 spatstat.sparse_3.1-0 reticulate_1.44.0
[25] cowplot_1.2.0 pbapply_1.7-4 RColorBrewer_1.1-3 abind_1.4-8
[29] rvest_1.0.5 Rtsne_0.17 GenomicRanges_1.62.0 presto_1.0.0
[33] purrr_1.2.0 ggraph_2.2.2 BiocGenerics_0.56.0 hash_2.2.6.3
[37] tweenr_2.0.3 evmix_2.12 IRanges_2.44.0 S4Vectors_0.48.0
[41] ggrepel_0.9.6 irlba_2.3.5.1 listenv_0.10.0 spatstat.utils_3.2-0
[45] iNEXT_3.0.2 MatrixModels_0.5-4 goftest_1.2-3 RSpectra_0.16-2
[49] spatstat.random_3.4-2 fitdistrplus_1.2-4 parallelly_1.45.1 codetools_0.2-20
[53] DelayedArray_0.36.0 xml2_1.4.1 ggforce_0.5.0 tidyselect_1.2.1
[57] farver_2.1.2 viridis_0.6.5 matrixStats_1.5.0 stats4_4.5.2
[61] spatstat.explore_3.5-3 Seqinfo_1.0.0 jsonlite_2.0.0 Formula_1.2-5
[65] tidygraph_1.3.1 progressr_0.18.0 ggridges_0.5.7 ggalluvial_0.12.5
[69] survival_3.8-3 tools_4.5.2 ica_1.0-3 Rcpp_1.1.0
[73] glue_1.8.0 gridExtra_2.3 SparseArray_1.10.1 xfun_0.54
[77] MatrixGenerics_1.22.0 withr_3.0.2 fastmap_1.2.0 SparseM_1.84-2
[81] digest_0.6.38 R6_2.6.1 mime_0.13 scattermore_1.2
[85] tensor_1.5.1 dichromat_2.0-0.1 spatstat.data_3.1-9 utf8_1.2.6
[89] generics_0.1.4 data.table_1.17.8 graphlayouts_1.2.2 httr_1.4.7
[93] htmlwidgets_1.6.4 S4Arrays_1.10.0 uwot_0.2.3 pkgconfig_2.0.3
[97] gtable_0.3.6 rsconnect_1.6.0 lmtest_0.9-40 S7_0.2.0
[101] SingleCellExperiment_1.32.0 XVector_0.50.0 htmltools_0.5.8.1 carData_3.0-5
[105] dotCall64_1.2 scales_1.4.0 Biobase_2.70.0 png_0.1-8
[109] spatstat.univar_3.1-4 ggdendro_0.2.0 knitr_1.50 rstudioapi_0.17.1
[113] tzdb_0.5.0 reshape2_1.4.5 rjson_0.2.23 nlme_3.1-168
[117] zoo_1.8-14 cachem_1.1.0 KernSmooth_2.23-26 parallel_4.5.2
[121] miniUI_0.1.2 pillar_1.11.1 grid_4.5.2 vctrs_0.6.5
[125] RANN_2.6.2 promises_1.4.0 car_3.1-3 xtable_1.8-4
[129] cluster_2.1.8.1 evaluate_1.0.5 cli_3.6.5 compiler_4.5.2
[133] crayon_1.5.3 rlang_1.1.6 future.apply_1.20.0 ggsignif_0.6.4
[137] labeling_0.4.3 immApex_1.4.0 plyr_1.8.9 stringi_1.8.7
[141] viridisLite_0.4.2 deldir_2.0-4 gsl_2.1-9 lazyeval_0.2.2
[145] spatstat.geom_3.6-0 quantreg_6.1 Matrix_1.7-4 RcppHNSW_0.6.0
[149] hms_1.1.4 bit64_4.6.0-1 future_1.67.0 statmod_1.5.1
[153] shiny_1.11.1 SummarizedExperiment_1.40.0 ROCR_1.0-11 igraph_2.2.1
[157] broom_1.0.10 memoise_2.0.1 bslib_0.9.0 bit_4.6.0