library(Seurat)
# Load 10x-format data
data_dir <- "data/GSM3478791/"
ss.data <- Read10X(data.dir = data_dir)
# Create Seurat object
ss_Borcherding <- CreateSeuratObject(counts = ss.data, project = "SS_Patient1", min.cells = 3, min.features = 200)
Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
# Basic QC and filtering
ss_Borcherding[["percent.mt"]] <- PercentageFeatureSet(ss_Borcherding, pattern = "^MT-")
ss_Borcherding <- subset(ss_Borcherding, subset = nFeature_RNA > 200 & nFeature_RNA < 3500 & percent.mt < 9)
VlnPlot(ss_Borcherding, features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt"),
ncol = 3)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
Please use the `layer` argument instead.Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
Please use `rlang::check_installed()` instead.
VlnPlot(ss_Borcherding, features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt",
"percent.rb"),
ncol = 4, pt.size = 0.1) &
theme(plot.title = element_text(size=10))
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.Warning: The following requested variables were not found: percent.rb
FeatureScatter(ss_Borcherding,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA") +
geom_smooth(method = 'lm')
##FeatureScatter is typically used to visualize feature-feature relationships ##for anything calculated by the object, ##i.e. columns in object metadata, PC scores etc.
FeatureScatter(ss_Borcherding,
feature1 = "nCount_RNA",
feature2 = "percent.mt")+
geom_smooth(method = 'lm')
FeatureScatter(ss_Borcherding,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA")+
geom_smooth(method = 'lm')
ss_Borcherding <- NormalizeData(ss_Borcherding, normalization.method = "LogNormalize", scale.factor = 10000)
Normalizing layer: counts
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
ss_Borcherding <- FindVariableFeatures(ss_Borcherding, selection.method = "vst", nfeatures = 2000)
Finding variable features for layer counts
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
ss_Borcherding <- ScaleData(ss_Borcherding, features = VariableFeatures(ss_Borcherding))
Centering and scaling data matrix
|
| | 0%
|
|======================================================== | 50%
|
|================================================================================================================| 100%
# Optional: Visualize elbow plot
ElbowPlot(ss_Borcherding, ndims = 20)
# Run JackStraw analysis properly
ss_Borcherding <- JackStraw(ss_Borcherding, num.replicate = 100)
| | 0 % ~calculating
|+ | 1 % ~01m 46s
|+ | 2 % ~01m 47s
|++ | 3 % ~01m 46s
|++ | 4 % ~01m 45s
|+++ | 5 % ~01m 45s
|+++ | 6 % ~01m 44s
|++++ | 7 % ~01m 42s
|++++ | 8 % ~01m 41s
|+++++ | 9 % ~01m 40s
|+++++ | 10% ~01m 39s
|++++++ | 11% ~01m 38s
|++++++ | 12% ~01m 36s
|+++++++ | 13% ~01m 35s
|+++++++ | 14% ~01m 34s
|++++++++ | 15% ~01m 33s
|++++++++ | 16% ~01m 32s
|+++++++++ | 17% ~01m 31s
|+++++++++ | 18% ~01m 31s
|++++++++++ | 19% ~01m 29s
|++++++++++ | 20% ~01m 28s
|+++++++++++ | 21% ~01m 27s
|+++++++++++ | 22% ~01m 26s
|++++++++++++ | 23% ~01m 25s
|++++++++++++ | 24% ~01m 24s
|+++++++++++++ | 25% ~01m 23s
|+++++++++++++ | 26% ~01m 24s
|++++++++++++++ | 27% ~01m 22s
|++++++++++++++ | 28% ~01m 21s
|+++++++++++++++ | 29% ~01m 20s
|+++++++++++++++ | 30% ~01m 19s
|++++++++++++++++ | 31% ~01m 18s
|++++++++++++++++ | 32% ~01m 16s
|+++++++++++++++++ | 33% ~01m 15s
|+++++++++++++++++ | 34% ~01m 14s
|++++++++++++++++++ | 35% ~01m 13s
|++++++++++++++++++ | 36% ~01m 12s
|+++++++++++++++++++ | 37% ~01m 11s
|+++++++++++++++++++ | 38% ~01m 10s
|++++++++++++++++++++ | 39% ~01m 08s
|++++++++++++++++++++ | 40% ~01m 07s
|+++++++++++++++++++++ | 41% ~01m 06s
|+++++++++++++++++++++ | 42% ~01m 05s
|++++++++++++++++++++++ | 43% ~01m 04s
|++++++++++++++++++++++ | 44% ~01m 03s
|+++++++++++++++++++++++ | 45% ~01m 01s
|+++++++++++++++++++++++ | 46% ~01m 00s
|++++++++++++++++++++++++ | 47% ~59s
|++++++++++++++++++++++++ | 48% ~58s
|+++++++++++++++++++++++++ | 49% ~57s
|+++++++++++++++++++++++++ | 50% ~56s
|++++++++++++++++++++++++++ | 51% ~55s
|++++++++++++++++++++++++++ | 52% ~54s
|+++++++++++++++++++++++++++ | 53% ~52s
|+++++++++++++++++++++++++++ | 54% ~51s
|++++++++++++++++++++++++++++ | 55% ~50s
|++++++++++++++++++++++++++++ | 56% ~49s
|+++++++++++++++++++++++++++++ | 57% ~48s
|+++++++++++++++++++++++++++++ | 58% ~47s
|++++++++++++++++++++++++++++++ | 59% ~46s
|++++++++++++++++++++++++++++++ | 60% ~45s
|+++++++++++++++++++++++++++++++ | 61% ~44s
|+++++++++++++++++++++++++++++++ | 62% ~42s
|++++++++++++++++++++++++++++++++ | 63% ~41s
|++++++++++++++++++++++++++++++++ | 64% ~40s
|+++++++++++++++++++++++++++++++++ | 65% ~39s
|+++++++++++++++++++++++++++++++++ | 66% ~38s
|++++++++++++++++++++++++++++++++++ | 67% ~37s
|++++++++++++++++++++++++++++++++++ | 68% ~36s
|+++++++++++++++++++++++++++++++++++ | 69% ~35s
|+++++++++++++++++++++++++++++++++++ | 70% ~34s
|++++++++++++++++++++++++++++++++++++ | 71% ~33s
|++++++++++++++++++++++++++++++++++++ | 72% ~31s
|+++++++++++++++++++++++++++++++++++++ | 73% ~30s
|+++++++++++++++++++++++++++++++++++++ | 74% ~29s
|++++++++++++++++++++++++++++++++++++++ | 75% ~28s
|++++++++++++++++++++++++++++++++++++++ | 76% ~27s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~26s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~25s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~24s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~22s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~21s
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~20s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~19s
|++++++++++++++++++++++++++++++++++++++++++ | 84% ~18s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~17s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~16s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~15s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~13s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~12s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~11s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~10s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~09s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~08s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~07s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~06s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~04s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~03s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=01m 52s
| | 0 % ~calculating
|+++ | 5 % ~00s
|+++++ | 10% ~00s
|++++++++ | 15% ~00s
|++++++++++ | 20% ~00s
|+++++++++++++ | 25% ~00s
|+++++++++++++++ | 30% ~00s
|++++++++++++++++++ | 35% ~00s
|++++++++++++++++++++ | 40% ~00s
|+++++++++++++++++++++++ | 45% ~00s
|+++++++++++++++++++++++++ | 50% ~00s
|++++++++++++++++++++++++++++ | 55% ~00s
|++++++++++++++++++++++++++++++ | 60% ~00s
|+++++++++++++++++++++++++++++++++ | 65% ~00s
|+++++++++++++++++++++++++++++++++++ | 70% ~00s
|++++++++++++++++++++++++++++++++++++++ | 75% ~00s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~00s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=00s
ss_Borcherding <- ScoreJackStraw(ss_Borcherding, dims = 1:20)
# Visualize JackStraw scores
JackStrawPlot(ss_Borcherding, dims = 1:20)
# TEST-1
# given that the output of RunPCA is "pca"
# replace "so" by the name of your seurat object
pct <- ss_Borcherding[["pca"]]@stdev / sum(ss_Borcherding[["pca"]]@stdev) * 100
cumu <- cumsum(pct) # Calculate cumulative percents for each PC
# Determine the difference between variation of PC and subsequent PC
co2 <- sort(which((pct[-length(pct)] - pct[-1]) > 0.1), decreasing = T)[1] + 1
# last point where change of % of variation is more than 0.1%. -> co2
co2
[1] 14
# TEST-2
# get significant PCs
stdv <- ss_Borcherding[["pca"]]@stdev
sum.stdv <- sum(ss_Borcherding[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100
cumulative <- cumsum(percent.stdv)
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co2 <- sort(which((percent.stdv[1:length(percent.stdv) - 1] -
percent.stdv[2:length(percent.stdv)]) > 0.1),
decreasing = T)[1] + 1
min.pc <- min(co1, co2)
min.pc
[1] 14
# Create a dataframe with values
plot_df <- data.frame(pct = percent.stdv,
cumu = cumulative,
rank = 1:length(percent.stdv))
# Elbow plot to visualize
ggplot(plot_df, aes(cumulative, percent.stdv, label = rank, color = rank > min.pc)) +
geom_text() +
geom_vline(xintercept = 90, color = "grey") +
geom_hline(yintercept = min(percent.stdv[percent.stdv > 5]), color = "grey") +
theme_bw()
NA
NA
NA
ss_Borcherding <- FindNeighbors(ss_Borcherding, dims = 1:10)
Computing nearest neighbor graph
Computing SNN
ss_Borcherding <- FindClusters(ss_Borcherding, resolution = 1.2)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 3443
Number of edges: 109727
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.7143
Number of communities: 13
Elapsed time: 0 seconds
DimPlot(ss_Borcherding, reduction = "umap", label = TRUE, pt.size = 0.6) +
ggtitle("UMAP of Sézary Syndrome CD4+ T Cells")
top_50_up <- read.csv("top_50_upregulated.csv") # or read.delim("top_50_up.tsv")
top_50_down <- read.csv("top_50_downregulated.csv")
FeaturePlot(ss_Borcherding,
features = top_50_up$gene[1:10],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: The following requested variables were not found: PCLAF
FeaturePlot(ss_Borcherding,
features = top_50_up$gene[11:20],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: The following requested variables were not found: PKMYT1
FeaturePlot(ss_Borcherding,
features = top_50_up$gene[21:30],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_Borcherding,
features = top_50_up$gene[31:40],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: The following requested variables were not found: CDC20
FeaturePlot(ss_Borcherding,
features = top_50_up$gene[41:50],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_Borcherding,
features = top_50_down$gene[1:10],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: The following requested variables were not found: PCED1B-AS1, SNHG5
FeaturePlot(ss_Borcherding,
features = top_50_down$gene[11:20],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: The following requested variables were not found: RIPOR2
FeaturePlot(ss_Borcherding,
features = top_50_down$gene[21:30],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
FeaturePlot(ss_Borcherding,
features = top_50_down$gene[31:40],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: The following requested variables were not found: LINC01578
FeaturePlot(ss_Borcherding,
features = top_50_down$gene[41:50],
reduction = "umap",
cols = c("lightblue", "red"), # Custom color gradient from light blue to red
label = TRUE)
Warning: The following requested variables were not found: AL138963.4
DimPlot(ss_Borcherding, group.by = "seurat_clusters", label = T, label.box = T, repel = T, reduction = "umap")
NA
NA
# Vector of genes to plot
up_genes <- c("CLIC1", "COX5A","GTSF1", "MAD2L1","MYBL2","MYL6B","NME1","PLK1", "PYCR1", "SLC25A5", "SRI", "TUBA1C", "UBE2T", "YWHAH")
# DotPlot with custom firebrick-red gradient
DotPlot(ss_Borcherding, features = up_genes) +
RotatedAxis() +
scale_color_gradient2(low = "lightyellow", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Upregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
# Downregulated genes
down_genes <- c("TXNIP", "RASA3", "RIPOR2",
"ZFP36", "ZFP36L1", "ZFP36L2",
"PRMT2", "MAX", "PIK3IP1",
"BTG1", "CDKN1B")
# DotPlot with firebrick color for high expression
DotPlot(ss_Borcherding, features = down_genes) +
RotatedAxis() +
scale_color_gradient2(low = "lightyellow", mid = "red", high = "firebrick", midpoint = 1) +
ggtitle("Expression of Downregulated Genes in Sézary Syndrome") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
)
Warning: The following requested variables were not found: RIPOR2Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
saveRDS(ss_Borcherding, file = "data/ss_Borcherding.rds")