All_samples_Merged <- readRDS("../0-Seurat_RDS_OBJECT_FINAL/All_samples_Merged_with_STCAT_and_renamed_FINAL.rds")
##. Define some color palette
# Define some color palette
pal <- c(scales::hue_pal()(8), RColorBrewer::brewer.pal(9, "Set1"), RColorBrewer::brewer.pal(8, "Set2"))
set.seed(1)
pal <- rep(sample(pal, length(pal)), 200)
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 9223676 492.6 16857814 900.4 12210725 652.2
Vcells 1232790134 9405.5 1530466652 11676.6 1249962156 9536.5
vars <- c("Patient_origin", "orig.ident", "seurat_clusters", "Phase")
pl <- list()
for (i in vars) {
pl[[i]] <- DimPlot(obj, group.by = i, label = T) + theme_void() + NoLegend()
}
wrap_plots(pl)
table(obj$seurat_clusters)
0 1 2 3 4 5 6 7 8 9 10 11 12 13
6789 5275 4663 4661 4086 3634 3536 3409 3338 3273 3212 1675 1063 691
vars <- c("TOX", "LAG3", "CTLA4", "TIGIT", "FOXP3", "IL2RA", "CTLA4", "IKZF2", "TIGIT", "CCR7", "SELL", "IL7R", "TCF7", "CCR7", "SELL", "IL7R", "TCF7", "LEF1", "GZMB", "PRF1", "IFNG", "KLRG1", "CD69", "CXCR6", "PRF1", "GZMB", "NKG7", "GNLY", "ISG15", "IFI6", "IFIT3", "MX1", "MKI67", "TOP2A", "STAT3", "AHR", "CCR6", "BATF", "PTPRC", "GZMB", "BCL6", "ICOS", "HSPA1A", "ATF3", "IL2RA", "CD69", "HLA-DRA", "CD34")
pl <- list()
pl <- list(DimPlot(obj, group.by = "seurat_clusters", label = T) + theme_void() + NoLegend())
for (i in vars) {
pl[[i]] <- FeaturePlot(obj, features = i, order = T) + theme_void() + NoLegend()
}
Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
Please use the `layer` argument instead.
wrap_plots(pl)
NA
NA
# Load necessary libraries
library(Seurat)
library(patchwork)
library(glue)
Attaching package: ‘glue’
The following object is masked from ‘package:SummarizedExperiment’:
trim
The following object is masked from ‘package:GenomicRanges’:
trim
The following object is masked from ‘package:IRanges’:
trim
# Define marker groups
marker_groups <- list(
"CD4 Tex (Exhausted)" = c("TOX", "LAG3", "CTLA4", "TIGIT"),
"CD4 Treg" = c("FOXP3", "IL2RA", "CTLA4", "IKZF2", "TIGIT"),
"CD4 Tcm (Central Memory)" = c("CCR7", "SELL", "IL7R", "TCF7"),
"CD4 Tn (Naive)" = c("CCR7", "SELL", "IL7R", "TCF7", "LEF1"),
"CD4 Tem (Effector Memory)" = c("GZMB", "PRF1", "IFNG", "KLRG1"),
"CD4 Trm (Tissue Resident)" = c("CD69", "CXCR6"),
"CD4 Tc (Cytotoxic)" = c("PRF1", "GZMB", "NKG7", "GNLY"),
"CD4 Tisg (IFN Signature)" = c("ISG15", "IFI6", "IFIT3", "MX1"),
"CD4 Proliferation" = c("MKI67", "TOP2A"),
"CD4 Th17" = c("STAT3", "AHR", "CCR6", "BATF"),
"CD4 Temra (Effector Memory RA+)" = c("PTPRC", "GZMB"),
"CD4 Tfh (Follicular Helper)" = c("BCL6", "ICOS"),
"CD4 Tstr (Stress)" = c("HSPA1A", "ATF3"),
"CD4 Activated" = c("IL2RA", "CD69", "HLA-DRA")
)
# OPTIONAL: Uncomment this to save all plots in a single PDF
# pdf("CD4_MarkerGroups_FeaturePlots.pdf", width = 12, height = 10)
# Main plotting loop
for (group_name in names(marker_groups)) {
genes <- unique(marker_groups[[group_name]])
message(glue("Plotting: {group_name}"))
# Cleaner titles and better theme
pl <- lapply(genes, function(gene) {
FeaturePlot(obj, features = gene, order = TRUE) +
ggtitle(gene) + # shorter title
theme_minimal(base_size = 10) + # use minimal for spacing
NoLegend()
})
# Combine plots
combined_plot <- wrap_plots(pl, ncol = 2) + # fewer columns = more space
plot_annotation(title = group_name)
print(combined_plot)
}
Plotting: CD4 Tex (Exhausted)
# OPTIONAL: Uncomment if using pdf()
# dev.off()
# Required markers
progenitor_markers <- c("CD34", "KIT", "GATA2", "MKI67", "PROM1", "FLT3")
# Feature plots for each marker
FeaturePlot(obj, features = progenitor_markers, order = TRUE, ncol = 3)
Warning: Could not find PROM1 in the default search locations, found in ‘RNA’ assay instead
# # Define lineage ends
# ENDS <- c("1", "8", "0" , "13")
set.seed(1)
lineages <- as.SlingshotDataSet(getLineages(
data = obj@reductions$umap@cell.embeddings,
clusterLabels = obj$seurat_clusters,
dist.method = "mnn", # It can be: "simple", "scaled.full", "scaled.diag", "slingshot" or "mnn"
)) # define where to START the trajectories
# IF NEEDED, ONE CAN ALSO MANULALLY EDIT THE LINEAGES, FOR EXAMPLE:
# sel <- sapply( lineages@lineages, function(x){rev(x)[1]} ) %in% ENDS
# lineages@lineages <- lineages@lineages[ sel ]
# names(lineages@lineages) <- paste0("Lineage",1:length(lineages@lineages))
# lineages
# Change the reduction to our "fixed" UMAP2d (FOR VISUALISATION ONLY)
lineages@reducedDim <- obj@reductions$umap@cell.embeddings
{
plot(obj@reductions$umap@cell.embeddings, col = pal[obj$seurat_clusters], cex = .5, pch = 16)
lines(lineages, lwd = 1, col = "black", cex = 2)
text(centroids2d, labels = rownames(centroids2d), cex = 0.8, font = 2, col = "white")
}
# Define curves
curves <- as.SlingshotDataSet(getCurves(
data = lineages,
thresh = 1e-1,
stretch = 1e-1,
allow.breaks = F,
approx_points = 1000
))
curves
class: SlingshotDataSet
lineages: 4
Lineage1: 5 3 10 1 9 4 0 7 11
Lineage2: 5 3 10 1 9 4 0 12
Lineage3: 5 3 10 1 9 2 6 13
Lineage4: 5 3 10 1 9 2 6 8
curves: 4
Curve1: Length: 24.111 Samples: 32790.38
Curve2: Length: 23.614 Samples: 33116.73
Curve3: Length: 29.842 Samples: 31122.23
Curve4: Length: 19.677 Samples: 31168.18
# Plots
{
plot(obj@reductions$umap@cell.embeddings, col = pal[obj$seurat_clusters], pch = 16)
lines(curves, lwd = 2, col = "black")
text(centroids2d, labels = levels(obj$seurat_clusters), cex = 1, font = 2)
}
pseudotime <- slingPseudotime(curves, na = FALSE)
cellWeights <- slingCurveWeights(curves)
x <- rowMeans(pseudotime)
x <- x / max(x)
o <- order(x)
{
plot(obj@reductions$umap@cell.embeddings[o, ],
main = paste0("pseudotime"), pch = 16, cex = 0.4, axes = F, xlab = "", ylab = "",
col = colorRampPalette(c("grey70", "orange3", "firebrick", "purple4"))(99)[x[o] * 98 + 1]
)
points(centroids2d, cex = 2.5, pch = 16, col = "#FFFFFF99")
text(centroids2d, labels = levels(obj$seurat_clusters), cex = 1, font = 2)
}
sel_cells <- split(colnames(obj@assays$SCT@data), obj$seurat_clusters)
sel_cells <- unlist(lapply(sel_cells, function(x) {
set.seed(1)
return(sample(x, 20))
}))
# Filter genes: expressed in at least 10 cells with count > 1
expr_filter <- rowSums(obj@assays$SCT@counts[, sel_cells] > 1) >= 10
filtered_genes <- rownames(obj)[expr_filter]
# Compute gene variance on filtered genes using raw counts
gv <- as.data.frame(na.omit(scran::modelGeneVar(obj@assays$SCT@data[, sel_cells])))
Warning: collapsing to unique 'x' values
# Order by biological variance decreasing
gv <- gv[order(gv$bio, decreasing = T), ]
# Select top 500 highly variable genes
sel_genes <- sort(rownames(gv)[1:500])
path_file <- "/home/bioinfo/data/trajectory/trajectory_seurat_filtered.rds"
# fetch_data is defined at the top of this document
sceGAM <- fitGAM(
counts = drop0(obj@assays$SCT@data[sel_genes, sel_cells]),
pseudotime = pseudotime[sel_cells, ],
cellWeights = cellWeights[sel_cells, ],
nknots = 5, verbose = TRUE, parallel = TRUE, sce = TRUE,
BPPARAM = BiocParallel::MulticoreParam()
)
Warning: Impossible to place a knot at all endpoints.Increase the number of knots to avoid this issue.
|
| | 0%
|
|=== | 3%
|
|====== | 5%
|
|========= | 8%
|
|============ | 10%
|
|=============== | 13%
|
|================== | 16%
|
|===================== | 18%
|
|======================== | 21%
|
|=========================== | 23%
|
|============================== | 26%
|
|================================= | 29%
|
|==================================== | 31%
|
|======================================= | 34%
|
|========================================= | 36%
|
|============================================ | 39%
|
|=============================================== | 42%
|
|================================================== | 44%
|
|===================================================== | 47%
|
|======================================================== | 49%
|
|=========================================================== | 52%
|
|============================================================== | 55%
|
|================================================================= | 57%
|
|==================================================================== | 60%
|
|======================================================================= | 62%
|
|========================================================================== | 65%
|
|============================================================================= | 68%
|
|================================================================================ | 70%
|
|=================================================================================== | 73%
|
|====================================================================================== | 75%
|
|========================================================================================= | 78%
|
|============================================================================================ | 81%
|
|=============================================================================================== | 83%
|
|================================================================================================== | 86%
|
|===================================================================================================== | 88%
|
|======================================================================================================== | 91%
|
|========================================================================================================= | 92%
|
|============================================================================================================ | 95%
|
|=============================================================================================================== | 97%
|
|==================================================================================================================| 100%
plotGeneCount(curves, clusters = obj$seurat_clusters, models = sceGAM)
lineages
class: SlingshotDataSet
lineages: 4
Lineage1: 5 3 10 1 9 4 0 7 11
Lineage2: 5 3 10 1 9 4 0 12
Lineage3: 5 3 10 1 9 2 6 13
Lineage4: 5 3 10 1 9 2 6 8
curves: 0
lc <- sapply(lineages@lineages, function(x) {
rev(x)[1]
})
names(lc) <- gsub("Lineage", "L", names(lc))
lc.idx = match(lc, levels(obj$seurat_clusters))
{
plot(obj@reductions$umap@cell.embeddings, col = pal[obj$seurat_clusters], pch = 16)
lines(curves, lwd = 2, col = "black")
points(centroids2d[lc.idx, ], col = "black", pch = 16, cex = 4)
text(centroids2d[lc.idx, ], labels = names(lc), cex = 1, font = 2, col = "white")
}
NA
NA
NA
NA
set.seed(8)
res <- na.omit(associationTest(sceGAM, contrastType = "consecutive"))
res <- res[res$pvalue < 1e-3, ]
res <- res[res$waldStat > mean(res$waldStat), ]
res <- res[order(res$waldStat, decreasing = T), ]
res[1:10, ]
NA
par(mfrow = c(4, 4), mar = c(.1, .1, 2, 1))
{
plot(obj@reductions$umap@cell.embeddings, col = pal[obj$seurat_clusters], cex = .5, pch = 16, axes = F, xlab = "", ylab = "")
lines(curves, lwd = 2, col = "black")
points(centroids2d[lc.idx, ], col = "black", pch = 15, cex = 3, xpd = T)
text(centroids2d[lc.idx, ], labels = names(lc), cex = 1, font = 2, col = "white", xpd = T)
}
vars <- rownames(res[1:15, ])
vars <- na.omit(vars[vars != "NA"])
for (i in vars) {
x <- drop0(obj@assays$SCT@data)[i, ]
x <- (x - min(x)) / (max(x) - min(x))
o <- order(x)
plot(obj@reductions$umap@cell.embeddings[o, ],
main = paste0(i), pch = 16, cex = 0.5, axes = F, xlab = "", ylab = "",
col = colorRampPalette(c("lightgray", "grey60", "navy"))(99)[x[o] * 98 + 1]
)
}
res <- na.omit(startVsEndTest(sceGAM, pseudotimeValues = c(0, 1)))
res <- res[res$pvalue < 1e-3, ]
res <- res[res$waldStat > mean(res$waldStat), ]
res <- res[order(res$waldStat, decreasing = T), ]
res[1:10, 1:6]
# Get the top UP and Down regulated in lineage 1
res_lin1 <- sort(setNames(res$logFClineage1, rownames(res)))
vars <- names(c(rev(res_lin1)[1:7], res_lin1[1:8]))
vars <- na.omit(vars[vars != "NA"])
par(mfrow = c(4, 4), mar = c(.1, .1, 2, 1))
{
plot(obj@reductions$umap@cell.embeddings, col = pal[obj$seurat_clusters], cex = .5, pch = 16, axes = F, xlab = "", ylab = "")
lines(curves, lwd = 2, col = "black")
points(centroids2d[lc.idx, ], col = "black", pch = 15, cex = 3, xpd = T)
text(centroids2d[lc.idx, ], labels = names(lc), cex = 1, font = 2, col = "white", xpd = T)
}
for (i in vars) {
x <- drop0(obj@assays$SCT@data)[i, ]
x <- (x - min(x)) / (max(x) - min(x))
o <- order(x)
plot(obj@reductions$umap@cell.embeddings[o, ],
main = paste0(i), pch = 16, cex = 0.5, axes = F, xlab = "", ylab = "",
col = colorRampPalette(c("lightgray", "grey60", "navy"))(99)[x[o] * 98 + 1]
)
}
res <- na.omit(diffEndTest(sceGAM))
res <- res[res$pvalue < 1e-3, ]
res <- res[res$waldStat > mean(res$waldStat), ]
res <- res[order(res$waldStat, decreasing = T), ]
res[1:10, ]
NA
# Get the top UP and Down regulated in lineage 1 vs 2
res_lin1_2 <- sort(setNames(res$logFC1_2, rownames(res)))
vars <- names(c(rev(res_lin1_2)[1:7], res_lin1_2[1:8]))
vars <- na.omit(vars[vars != "NA"])
par(mfrow = c(4, 4), mar = c(.1, .1, 2, 1))
{
plot(obj@reductions$umap@cell.embeddings, col = pal[obj$seurat_clusters], cex = .5, pch = 16, axes = F, xlab = "", ylab = "")
lines(curves, lwd = 2, col = "black")
points(centroids2d[lc.idx, ], col = "black", pch = 15, cex = 3, xpd = T)
text(centroids2d[lc.idx, ], labels = names(lc), cex = 1, font = 2, col = "white", xpd = T)
}
for (i in vars) {
x <- drop0(obj@assays$SCT@data)[i, ]
x <- (x - min(x)) / (max(x) - min(x))
o <- order(x)
plot(obj@reductions$umap@cell.embeddings[o, ],
main = paste0(i), pch = 16, cex = 0.5, axes = F, xlab = "", ylab = "",
col = colorRampPalette(c("lightgray", "grey60", "navy"))(99)[x[o] * 98 + 1]
)
}