# Check metadata distribution
table(all_seurat$orig.ident)
Control SS_P1 SS_P2 SS_P3 SS_P4 SS_P5 SS_P6
4437 3443 34179 3084 849 1582 3521
VlnPlot(ss_Borcherding, features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt"),
ncol = 3)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
Please use the `layer` argument instead.Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
Please use `rlang::check_installed()` instead.
VlnPlot(ss_Borcherding, features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt",
"percent.rb"),
ncol = 4, pt.size = 0.1) &
theme(plot.title = element_text(size=10))
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.Warning: The following requested variables were not found: percent.rb
FeatureScatter(ss_Borcherding,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA") +
geom_smooth(method = 'lm')
##FeatureScatter is typically used to visualize feature-feature relationships ##for anything calculated by the object, ##i.e. columns in object metadata, PC scores etc.
FeatureScatter(ss_Borcherding,
feature1 = "nCount_RNA",
feature2 = "percent.mt")+
geom_smooth(method = 'lm')
FeatureScatter(ss_Borcherding,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA")+
geom_smooth(method = 'lm')
library(Seurat)
# 1. Split the Seurat object by patient_id or sample
ss_list <- SplitObject(ss_Borcherding, split.by = "orig.ident")
# 2. Normalize and find variable features for each dataset independently
ss_list <- lapply(ss_list, function(x) {
x <- NormalizeData(x, normalization.method = "LogNormalize", scale.factor = 10000)
x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
return(x)
})
Normalizing layer: counts.SS_P1.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Finding variable features for layer counts.SS_P1.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Normalizing layer: counts.SS_P2.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Finding variable features for layer counts.SS_P2.SeuratProject.SeuratProject.SeuratProject.SeuratProject.SeuratProject
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Normalizing layer: counts.SS_P3.SeuratProject.SeuratProject.SeuratProject.SeuratProject
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Finding variable features for layer counts.SS_P3.SeuratProject.SeuratProject.SeuratProject.SeuratProject
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Normalizing layer: counts.SS_P4.SeuratProject.SeuratProject.SeuratProject
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Finding variable features for layer counts.SS_P4.SeuratProject.SeuratProject.SeuratProject
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Normalizing layer: counts.SS_P5.SeuratProject.SeuratProject
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Finding variable features for layer counts.SS_P5.SeuratProject.SeuratProject
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Normalizing layer: counts.SS_P6.SeuratProject
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Finding variable features for layer counts.SS_P6.SeuratProject
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Normalizing layer: counts.Control
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Finding variable features for layer counts.Control
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
# 3. Select integration features and find anchors
features <- SelectIntegrationFeatures(object.list = ss_list, nfeatures = 2000)
anchors <- FindIntegrationAnchors(object.list = ss_list, anchor.features = features, reduction = "cca")
Scaling features for provided objects
| | 0 % ~calculating
|++++++++ | 14% ~01s
|+++++++++++++++ | 29% ~07s
|++++++++++++++++++++++ | 43% ~04s
|+++++++++++++++++++++++++++++ | 57% ~02s
|++++++++++++++++++++++++++++++++++++ | 71% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=03s
Finding all pairwise anchors
| | 0 % ~calculating
Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
Please use the `layer` argument instead.Running CCA
Merging objects
Warning: The `slot` argument of `SetAssayData()` is deprecated as of SeuratObject 5.0.0.
Please use the `layer` argument instead.Finding neighborhoods
Finding anchors
Found 16592 anchors
Filtering anchors
Retained 656 anchors
|+++ | 5 % ~01h 41m 44s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 8842 anchors
Filtering anchors
Retained 1649 anchors
|+++++ | 10% ~54m 25s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 11820 anchors
Filtering anchors
Retained 3215 anchors
|++++++++ | 14% ~01h 10m 17s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3216 anchors
Filtering anchors
Retained 1265 anchors
|++++++++++ | 19% ~50m 49s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3356 anchors
Filtering anchors
Retained 2187 anchors
|++++++++++++ | 24% ~45m 13s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3053 anchors
Filtering anchors
Retained 2786 anchors
|+++++++++++++++ | 29% ~35m 51s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 5828 anchors
Filtering anchors
Retained 1676 anchors
|+++++++++++++++++ | 33% ~29m 25s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 6544 anchors
Filtering anchors
Retained 3213 anchors
|++++++++++++++++++++ | 38% ~29m 26s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 5816 anchors
Filtering anchors
Retained 3569 anchors
|++++++++++++++++++++++ | 43% ~24m 34s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 2911 anchors
Filtering anchors
Retained 2169 anchors
|++++++++++++++++++++++++ | 48% ~20m 24s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 9206 anchors
Filtering anchors
Retained 1877 anchors
|+++++++++++++++++++++++++++ | 52% ~17m 26s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 11842 anchors
Filtering anchors
Retained 4050 anchors
|+++++++++++++++++++++++++++++ | 57% ~19m 28s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 8120 anchors
Filtering anchors
Retained 3963 anchors
|+++++++++++++++++++++++++++++++ | 62% ~16m 24s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3260 anchors
Filtering anchors
Retained 1962 anchors
|++++++++++++++++++++++++++++++++++ | 67% ~13m 26s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 5750 anchors
Filtering anchors
Retained 3022 anchors
|++++++++++++++++++++++++++++++++++++ | 71% ~10m 54s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 12799 anchors
Filtering anchors
Retained 1902 anchors
|+++++++++++++++++++++++++++++++++++++++ | 76% ~08m 48s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 20781 anchors
Filtering anchors
Retained 3017 anchors
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~08m 34s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 9303 anchors
Filtering anchors
Retained 1984 anchors
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~06m 11s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3377 anchors
Filtering anchors
Retained 808 anchors
|++++++++++++++++++++++++++++++++++++++++++++++ | 90% ~03m 56s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 6250 anchors
Filtering anchors
Retained 1422 anchors
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~01m 53s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 10154 anchors
Filtering anchors
Retained 2154 anchors
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=38m 33s
# 4. Integrate data
ss_integrated <- IntegrateData(anchorset = anchors)
Merging dataset 4 into 3
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Warning: Layer counts isn't present in the assay object; returning NULLMerging dataset 5 into 3 4
Extracting anchors for merged samples
Finding integration vectors
Warning: Different cells in new layer data than already exists for scale.dataFinding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Warning: Layer counts isn't present in the assay object; returning NULLMerging dataset 6 into 3 4 5
Extracting anchors for merged samples
Finding integration vectors
Warning: Different cells in new layer data than already exists for scale.dataFinding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Warning: Layer counts isn't present in the assay object; returning NULLMerging dataset 3 4 5 6 into 2
Extracting anchors for merged samples
Finding integration vectors
Warning: Different cells in new layer data than already exists for scale.dataFinding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Warning: Layer counts isn't present in the assay object; returning NULLMerging dataset 7 into 2 3 4 5 6
Extracting anchors for merged samples
Finding integration vectors
Warning: Different cells in new layer data than already exists for scale.dataFinding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Warning: Layer counts isn't present in the assay object; returning NULLMerging dataset 1 into 2 3 4 5 6 7
Extracting anchors for merged samples
Finding integration vectors
Warning: Different cells in new layer data than already exists for scale.dataFinding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Warning: Layer counts isn't present in the assay object; returning NULL
# 5. Proceed with downstream analysis on integrated data
DefaultAssay(ss_integrated) <- "integrated"
# Scale data
ss_integrated <- ScaleData(ss_integrated, verbose = FALSE)
ss_Borcherding <- ss_integrated
# PCA
ss_Borcherding <- RunPCA(ss_Borcherding, features = VariableFeatures(ss_Borcherding))
PC_ 1
Positive: TRBV7-2, IL32, IFITM1, CD52, B2M, CD3D, CD3E, LTB, TRAV41, PTPRCAP
RPS18, CRIP1, IL7R, IL2RG, CCR7, LAT, TCF7, LDHB, DDX5, RPL3
PPP2R5C, AIRE, SLC9A3R1, HLA-B, AQP3, EMP3, RPS6, TAGLN2, GSTK1, CD27
Negative: TYROBP, LYZ, FCN1, CST3, S100A9, AIF1, S100A8, FCER1G, LST1, SPI1
SERPINA1, VCAN, HLA-DRA, S100A12, CYBB, CTSS, LGALS2, MNDA, GSTP1, CFD
CSTA, CD14, FTL, MS4A6A, FOS, HLA-DRB1, NCF1, CD68, PSAP, FGR
PC_ 2
Positive: TMSB10, S100A6, S100A10, CD52, CRIP1, S100A4, IL32, S100A11, GAPDH, RPS18
TRBV7-2, VIM, RPS19, HLA-B, TMSB4X, ACTG1, IFITM1, CFL1, RPS6, HLA-C
PPIA, YBX1, MT-CO1, LGALS1, CORO1A, NME2, CD3D, FYB, GSTK1, IFITM2
Negative: SNCA, MS4A1, HBB, MPP1, CTD-3214H19.6, C2orf88, CA2, CD79A, STX1A, FAM212B
NRGN, CTNNAL1, RGS18, ATF3, SLC25A37, SH3BGRL2, PDGFA, FAM46C, SUSD1, ESAM
ACRBP, EPB42, HIST1H3H, MMRN1, LINC00926, IGLV2-14, CENPN, BCL2L1, NFE2, MAP3K8
PC_ 3
Positive: TRBV7-2, S100A10, JUNB, S100A6, LTB, DDX5, S100A8, S100A12, S100A9, IL7R
VCAN, TRAV41, CD52, LYZ, TSC22D3, AIRE, TCF7, EPHB6, FOS, CD14
MNDA, TMSB10, RNASET2, CCR7, CRIP1, TRBV12-4, PPP2R5C, FCN1, LGALS2, MAL
Negative: NKG7, CCL5, GZMA, GZMH, GZMB, CTSW, CCL4, CD8A, PRF1, CD8B
CLIC3, CD7, KLRD1, GZMM, CMC1, HOPX, GZMK, CST7, GNLY, MATK
KLRB1, PFN1, PLEK, C12orf75, TBX21, SPON2, HCST, TRBV7-6, KLRG1, APOBEC3G
PC_ 4
Positive: RPLP1, RPS19, SCN1B, MS4A1, RPS12, CD79A, RPS18, KLHL35, TMSB10, RPL13
RPS6, IGLV2-14, MALAT1, HLA-DQA1, RPLP0, NKG7, LINC00926, AQP10, GZMA, MT-CO1
RPL3, MT-CO2, POU2AF1, HLA-DRA, HLA-DRB1, HLA-DPB1, GZMB, GZMH, HLA-DOB, CD7
Negative: CTD-3214H19.6, NRGN, STX1A, FAM212B, ATF3, RGS18, HIST1H3H, CENPN, C2orf88, PDGFA
SH3BGRL2, CORO1C, CTNNAL1, HIST1H2AC, ESAM, SUSD1, HIST1H2BJ, MMD, TSC22D1, CLU
ACRBP, PLA2G12A, CDK2AP1, RP11-367G6.3, MMRN1, AP003068.23, MAP3K7CL, SLC40A1, CA2, MPP1
PC_ 5
Positive: MS4A1, CD79A, HLA-DQA1, LINC00926, HLA-DRA, IGLV2-14, HLA-DPB1, POU2AF1, HLA-DPA1, IGHM
HLA-DOB, HLA-DRB1, CD74, HLA-DRB5, HLA-DMA, CD79B, MEF2C, TCF4, MYBL2, HVCN1
SCIMP, CD40, HLA-DQB1, KIAA0226L, FAM26F, NT5E, FAM129C, MZB1, EAF2, RP11-126O1.6
Negative: S100A12, NKG7, VCAN, S100A8, GZMA, GZMH, S100A9, GZMB, CCL4, CCL5
CTSW, LYZ, CD14, PRF1, KLRD1, CLIC3, CD7, CTSD, CD8A, MGST1
S100A6, CSF3R, MNDA, CD8B, S100A4, FOS, TYROBP, CST7, GZMM, HOPX
# Optional: Visualize elbow plot
ElbowPlot(ss_Borcherding, ndims = 50)
# # Run JackStraw analysis properly
# ss_Borcherding <- JackStraw(ss_Borcherding, num.replicate = 100)
# ss_Borcherding <- ScoreJackStraw(ss_Borcherding, dims = 1:20)
#
# # Visualize JackStraw scores
# JackStrawPlot(ss_Borcherding, dims = 1:20)
# TEST-1
# given that the output of RunPCA is "pca"
# replace "so" by the name of your seurat object
pct <- ss_Borcherding[["pca"]]@stdev / sum(ss_Borcherding[["pca"]]@stdev) * 100
cumu <- cumsum(pct) # Calculate cumulative percents for each PC
# Determine the difference between variation of PC and subsequent PC
co2 <- sort(which((pct[-length(pct)] - pct[-1]) > 0.1), decreasing = T)[1] + 1
# last point where change of % of variation is more than 0.1%. -> co2
co2
[1] 13
# TEST-2
# get significant PCs
stdv <- ss_Borcherding[["pca"]]@stdev
sum.stdv <- sum(ss_Borcherding[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100
cumulative <- cumsum(percent.stdv)
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co2 <- sort(which((percent.stdv[1:length(percent.stdv) - 1] -
percent.stdv[2:length(percent.stdv)]) > 0.1),
decreasing = T)[1] + 1
min.pc <- min(co1, co2)
min.pc
[1] 13
# Create a dataframe with values
plot_df <- data.frame(pct = percent.stdv,
cumu = cumulative,
rank = 1:length(percent.stdv))
# Elbow plot to visualize
ggplot(plot_df, aes(cumulative, percent.stdv, label = rank, color = rank > min.pc)) +
geom_text() +
geom_vline(xintercept = 90, color = "grey") +
geom_hline(yintercept = min(percent.stdv[percent.stdv > 5]), color = "grey") +
theme_bw()
NA
NA
NA
# Then find neighbors & clusters
ss_Borcherding <- FindNeighbors(ss_Borcherding, dims = 1:40)
Computing nearest neighbor graph
Computing SNN
ss_Borcherding <- FindClusters(ss_Borcherding, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 51095
Number of edges: 1483726
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8983
Number of communities: 33
Elapsed time: 8 seconds
12 singletons identified. 21 final clusters.
# run UMAP
ss_Borcherding <- RunUMAP(ss_Borcherding, dims = 1:40)
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session17:00:51 UMAP embedding parameters a = 0.9922 b = 1.112
17:00:51 Read 51095 rows and found 40 numeric columns
17:00:51 Using Annoy for neighbor search, n_neighbors = 30
17:00:51 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
17:00:56 Writing NN index file to temp file /tmp/RtmpgtON3t/file158c73aa3390f
17:00:56 Searching Annoy index using 1 thread, search_k = 3000
17:01:13 Annoy recall = 100%
17:01:14 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
17:01:16 Initializing from normalized Laplacian + noise (using RSpectra)
17:01:17 Commencing optimization for 200 epochs, with 2375354 positive edges
17:01:17 Using rng type: pcg
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
17:01:36 Optimization finished
ss_Borcherding <-RunTSNE(ss_Borcherding, dims.use = 1:40)
DimPlot(ss_Borcherding, reduction = "umap",group.by = "orig.ident", label = TRUE,repel = T, pt.size = 0.6) +
ggtitle("UMAP of Sézary Syndrome CD4+ T Cells")
DimPlot(ss_Borcherding, reduction = "umap", label = TRUE,repel = T, pt.size = 0.6) +
ggtitle("UMAP of Sézary Syndrome CD4+ T Cells")
DimPlot(ss_Borcherding, reduction = "tsne", label = TRUE, pt.size = 0.6) +
ggtitle("TSNE of Sézary Syndrome CD4+ T Cells")
top_50_up <- read.csv("../Data_SS_Borcherding_2019/top_50_upregulated.csv") # or read.delim("top_50_up.tsv")
top_50_down <- read.csv("../Data_SS_Borcherding_2019/top_50_downregulated.csv")
Idents(ss_Borcherding) <- "seurat_clusters"
FeaturePlot(ss_Borcherding,
features = top_50_up$gene[1:10],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: Could not find CDT1 in the default search locations, found in 'RNA' assay insteadWarning: Removing 849 cells missing data for features requestedWarning: Could not find CRNDE in the default search locations, found in 'RNA' assay insteadWarning: Removing 5515 cells missing data for features requestedWarning: Could not find CDCA8 in the default search locations, found in 'RNA' assay insteadWarning: Removing 2431 cells missing data for features requestedWarning: The following requested variables were not found: PCLAF
FeaturePlot(ss_Borcherding,
features = top_50_up$gene[11:20],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: Could not find PKMYT1 in the default search locations, found in 'RNA' assay insteadWarning: Removing 5874 cells missing data for features requestedWarning: Could not find AHCY in the default search locations, found in 'RNA' assay insteadWarning: Could not find TTF2 in the default search locations, found in 'RNA' assay insteadWarning: Could not find COX5A in the default search locations, found in 'RNA' assay instead
FeaturePlot(ss_Borcherding,
features = top_50_up$gene[21:30],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: Could not find FH in the default search locations, found in 'RNA' assay insteadWarning: Could not find UCK2 in the default search locations, found in 'RNA' assay insteadWarning: Could not find CCNB1 in the default search locations, found in 'RNA' assay insteadWarning: Removing 1582 cells missing data for features requestedWarning: Could not find PYCR1 in the default search locations, found in 'RNA' assay insteadWarning: Removing 1582 cells missing data for features requestedWarning: Could not find TRIP13 in the default search locations, found in 'RNA' assay insteadWarning: Removing 849 cells missing data for features requested
FeaturePlot(ss_Borcherding,
features = top_50_up$gene[31:40],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: Could not find RNASEH2A in the default search locations, found in 'RNA' assay insteadWarning: Could not find NME1 in the default search locations, found in 'RNA' assay insteadWarning: Could not find NABP2 in the default search locations, found in 'RNA' assay insteadWarning: Could not find CDC20 in the default search locations, found in 'RNA' assay insteadWarning: Removing 5874 cells missing data for features requested
FeaturePlot(ss_Borcherding,
features = top_50_up$gene[41:50],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: Could not find UBE2C in the default search locations, found in 'RNA' assay insteadWarning: Removing 849 cells missing data for features requestedWarning: Could not find EBP in the default search locations, found in 'RNA' assay insteadWarning: Could not find SLC29A1 in the default search locations, found in 'RNA' assay insteadWarning: Could not find SRI in the default search locations, found in 'RNA' assay insteadWarning: Could not find PLK1 in the default search locations, found in 'RNA' assay insteadWarning: Removing 849 cells missing data for features requested
FeaturePlot(ss_Borcherding,
features = top_50_down$gene[1:10],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: Could not find SARAF in the default search locations, found in 'RNA' assay insteadWarning: Could not find BTG1 in the default search locations, found in 'RNA' assay insteadWarning: Could not find SRSF5 in the default search locations, found in 'RNA' assay insteadWarning: Could not find IKZF1 in the default search locations, found in 'RNA' assay insteadWarning: Could not find PRMT2 in the default search locations, found in 'RNA' assay insteadWarning: Could not find RBL2 in the default search locations, found in 'RNA' assay insteadWarning: Could not find PACS1 in the default search locations, found in 'RNA' assay insteadWarning: The following requested variables were not found: PCED1B-AS1, SNHG5
FeaturePlot(ss_Borcherding,
features = top_50_down$gene[11:20],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: Could not find ZBTB20 in the default search locations, found in 'RNA' assay insteadWarning: Could not find RAPGEF6 in the default search locations, found in 'RNA' assay insteadWarning: Could not find N4BP2L2 in the default search locations, found in 'RNA' assay insteadWarning: Could not find SF1 in the default search locations, found in 'RNA' assay insteadWarning: Could not find RPS27 in the default search locations, found in 'RNA' assay insteadWarning: Could not find RSRP1 in the default search locations, found in 'RNA' assay insteadWarning: Could not find DAZAP2 in the default search locations, found in 'RNA' assay insteadWarning: The following requested variables were not found: RIPOR2
FeaturePlot(ss_Borcherding,
features = top_50_down$gene[21:30],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: Could not find HLA-E in the default search locations, found in 'RNA' assay insteadWarning: Could not find SON in the default search locations, found in 'RNA' assay insteadWarning: Could not find DDX6 in the default search locations, found in 'RNA' assay insteadWarning: Could not find RASA3 in the default search locations, found in 'RNA' assay insteadWarning: Could not find VAMP2 in the default search locations, found in 'RNA' assay insteadWarning: Could not find PNRC1 in the default search locations, found in 'RNA' assay insteadWarning: Could not find CDKN1B in the default search locations, found in 'RNA' assay instead
FeaturePlot(ss_Borcherding,
features = top_50_down$gene[31:40],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: Could not find LEPROTL1 in the default search locations, found in 'RNA' assay insteadWarning: Could not find EPC1 in the default search locations, found in 'RNA' assay insteadWarning: Could not find FOXP1 in the default search locations, found in 'RNA' assay insteadWarning: Could not find CIRBP in the default search locations, found in 'RNA' assay insteadWarning: Could not find SUN2 in the default search locations, found in 'RNA' assay insteadWarning: The following requested variables were not found: LINC01578
FeaturePlot(ss_Borcherding,
features = top_50_down$gene[41:50],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: Could not find HIF1A in the default search locations, found in 'RNA' assay insteadWarning: Could not find PNISR in the default search locations, found in 'RNA' assay insteadWarning: Could not find R3HDM4 in the default search locations, found in 'RNA' assay insteadWarning: Could not find ANKRD44 in the default search locations, found in 'RNA' assay insteadWarning: Could not find DDX24 in the default search locations, found in 'RNA' assay insteadWarning: The following requested variables were not found: AL138963.4
DimPlot(ss_Borcherding, group.by = "seurat_clusters", label = T, label.box = T, repel = T, reduction = "umap")
NA
NA
DefaultAssay(ss_Borcherding) <- "RNA"
Idents(ss_Borcherding) <- "Disease_state"
# Vector of genes to plot
up_genes <- c("CLIC1", "COX5A","GTSF1", "MAD2L1","MYBL2","MYL6B","NME1","PLK1", "PYCR1", "SLC25A5", "SRI", "TUBA1C", "UBE2T", "YWHAH")
# DotPlot with custom firebrick-red gradient
DotPlot(ss_Borcherding, features = up_genes) +
RotatedAxis() +
scale_color_gradient2(low = "lightyellow", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Upregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
Warning: Scaling data with a low number of groups may produce misleading resultsScale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Idents(ss_Borcherding) <- "orig.ident"
# Vector of genes to plot
up_genes <- c("CLIC1", "COX5A","GTSF1", "MAD2L1","MYBL2","MYL6B","NME1","PLK1", "PYCR1", "SLC25A5", "SRI", "TUBA1C", "UBE2T", "YWHAH")
# DotPlot with custom firebrick-red gradient
DotPlot(ss_Borcherding, features = up_genes) +
RotatedAxis() +
scale_color_gradient2(low = "lightyellow", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Upregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Idents(ss_Borcherding) <- "seurat_clusters"
# Vector of genes to plot
up_genes <- c("CLIC1", "COX5A","GTSF1", "MAD2L1","MYBL2","MYL6B","NME1","PLK1", "PYCR1", "SLC25A5", "SRI", "TUBA1C", "UBE2T", "YWHAH")
# DotPlot with custom firebrick-red gradient
DotPlot(ss_Borcherding, features = up_genes) +
RotatedAxis() +
scale_color_gradient2(low = "lightyellow", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Upregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Idents(ss_Borcherding) <- "Disease_state"
# Downregulated genes
down_genes <- c("TXNIP", "RASA3", "RIPOR2",
"ZFP36", "ZFP36L1", "ZFP36L2",
"PRMT2", "MAX", "PIK3IP1",
"BTG1", "CDKN1B")
# DotPlot with firebrick color for high expression
DotPlot(ss_Borcherding, features = down_genes) +
RotatedAxis() +
scale_color_gradient2(low = "lightyellow", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Downregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
Warning: The following requested variables were not found: RIPOR2Warning: Scaling data with a low number of groups may produce misleading resultsScale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Idents(ss_Borcherding) <- "orig.ident"
# Downregulated genes
down_genes <- c("TXNIP", "RASA3", "RIPOR2",
"ZFP36", "ZFP36L1", "ZFP36L2",
"PRMT2", "MAX", "PIK3IP1",
"BTG1", "CDKN1B")
# DotPlot with firebrick color for high expression
DotPlot(ss_Borcherding, features = down_genes) +
RotatedAxis() +
scale_color_gradient2(low = "lightyellow", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Downregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
Warning: The following requested variables were not found: RIPOR2Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Idents(ss_Borcherding) <- "seurat_clusters"
# DotPlot with firebrick color for high expression
DotPlot(ss_Borcherding, features = down_genes) +
RotatedAxis() +
scale_color_gradient2(low = "lightyellow", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Downregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
Warning: The following requested variables were not found: RIPOR2Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
saveRDS(ss_Borcherding, file = "data/ss_Borcherding_Malignant_6_Normal_1_Integrated_object.rds")
# Load required libraries
library(Seurat)
library(dplyr)
library(tibble)
combined_seu <- ss_Borcherding
# Join the layers of the RNA assay
combined_seu <- JoinLayers(combined_seu, assay = "RNA")
# Ensure your identity class is set to disease status
Idents(combined_seu) <- "Disease_state" # e.g., levels: "SS", "Control"
# Run differential expression between SS vs Control
markers_disease <- FindMarkers(
object = combined_seu,
ident.1 = "Malignant",
ident.2 = "Healthy",
assay = "RNA",
logfc.threshold = 0,
min.pct = 0,
test.use = "wilcox" # or "MAST" if RNA assay supports it
)
# Save results to CSV
write.csv(markers_disease, file = "DE_SS_vs_Healthy.csv", row.names = TRUE)
# Get log-normalized expression matrix (RNA assay)
expression_data_RNA <- GetAssayData(combined_seu, assay = "RNA", slot = "data")
# Get cell names for each group
ss_cells <- WhichCells(combined_seu, idents = "Malignant")
healthy_cells <- WhichCells(combined_seu, idents = "Healthy")
# Function to add mean expression per group
calculate_mean_expression <- function(markers, group1_cells, group2_cells, expression_data) {
group1_mean <- rowMeans(expression_data[, group1_cells, drop = FALSE], na.rm = TRUE)
group2_mean <- rowMeans(expression_data[, group2_cells, drop = FALSE], na.rm = TRUE)
markers <- markers %>%
rownames_to_column("gene") %>%
mutate(
mean_expr_SS = group1_mean[gene],
mean_expr_Healthy = group2_mean[gene],
log2FC_manual = log2(mean_expr_SS + 1) - log2(mean_expr_Healthy + 1)
)
return(markers)
}
# Apply the function and save final result
markers_disease_with_mean <- calculate_mean_expression(markers_disease, ss_cells, healthy_cells, expression_data_RNA)
write.csv(markers_disease_with_mean, "Borcherding_integrated_object_DE/DE_SS_vs_Healthy_with_MeanExpr_Borcherding2023.csv", row.names = FALSE)