5. Exploring the data
vars <- c("CD34", "TIGIT", "NKG7", "GNLY", "IFNG", "PRF1", "GZMB", "IL4", "IL13", "IL2RA", "LAG3")
pl <- list()
pl <- list(DimPlot(obj, group.by = "seurat_clusters", label = T) + theme_void() + NoLegend())
for (i in vars) {
pl[[i]] <- FeaturePlot(obj, features = i, order = T) + theme_void() + NoLegend()
}
wrap_plots(pl)

NA
NA
6. compute the lineages on these dataset
# Define lineage ends
ENDS <- c("2", "6", "8")
set.seed(1)
lineages <- as.SlingshotDataSet(getLineages(
data = obj@reductions$umap@cell.embeddings,
clusterLabels = obj$seurat_clusters,
dist.method = "mnn", # It can be: "simple", "scaled.full", "scaled.diag", "slingshot" or "mnn"
end.clus = ENDS, # You can also define the ENDS!
start.clus = "3"
)) # define where to START the trajectories
# IF NEEDED, ONE CAN ALSO MANULALLY EDIT THE LINEAGES, FOR EXAMPLE:
# sel <- sapply( lineages@lineages, function(x){rev(x)[1]} ) %in% ENDS
# lineages@lineages <- lineages@lineages[ sel ]
# names(lineages@lineages) <- paste0("Lineage",1:length(lineages@lineages))
# lineages
# Change the reduction to our "fixed" UMAP2d (FOR VISUALISATION ONLY)
lineages@reducedDim <- obj@reductions$umap@cell.embeddings
{
plot(obj@reductions$umap@cell.embeddings, col = pal[obj$seurat_clusters], cex = .5, pch = 16)
lines(lineages, lwd = 1, col = "black", cex = 2)
text(centroids2d, labels = rownames(centroids2d), cex = 0.8, font = 2, col = "white")
}

Defining Principal Curves

compute the differentiation pseudotime
pseudotime <- slingPseudotime(curves, na = FALSE)
cellWeights <- slingCurveWeights(curves)
x <- rowMeans(pseudotime)
x <- x / max(x)
o <- order(x)
{
plot(obj@reductions$umap@cell.embeddings[o, ],
main = paste0("pseudotime"), pch = 16, cex = 0.4, axes = F, xlab = "", ylab = "",
col = colorRampPalette(c("grey70", "orange3", "firebrick", "purple4"))(99)[x[o] * 98 + 1]
)
points(centroids2d, cex = 2.5, pch = 16, col = "#FFFFFF99")
text(centroids2d, labels = levels(obj$seurat_clusters), cex = 1, font = 2)
}

7. Finding differentially expressed genes

plotGeneCount(curves, clusters = obj$seurat_clusters, models = sceGAM)
lineages
class: SlingshotDataSet
lineages: 4
Lineage1: 3 10 1 9 4 0 12 13 6
Lineage2: 3 10 1 9 4 0 7 11 8
Lineage3: 3 10 1 9 2
Lineage4: 3 5
curves: 0
lc <- sapply(lineages@lineages, function(x) {
rev(x)[1]
})
names(lc) <- gsub("Lineage", "L", names(lc))
lc.idx = match(lc, levels(obj$seurat_clusters))
{
plot(obj@reductions$umap@cell.embeddings, col = pal[obj$seurat_clusters], pch = 16)
lines(curves, lwd = 2, col = "black")
points(centroids2d[lc.idx, ], col = "black", pch = 16, cex = 4)
text(centroids2d[lc.idx, ], labels = names(lc), cex = 1, font = 2, col = "white")
}

NA
NA
Genes that change with pseudotime
set.seed(8)
res <- na.omit(associationTest(sceGAM, contrastType = "consecutive"))
res <- res[res$pvalue < 1e-3, ]
res <- res[res$waldStat > mean(res$waldStat), ]
res <- res[order(res$waldStat, decreasing = T), ]
res[1:10, ]
NA
We can plot their expression
par(mfrow = c(4, 4), mar = c(.1, .1, 2, 1))
{
plot(obj@reductions$umap@cell.embeddings, col = pal[obj$seurat_clusters], cex = .5, pch = 16, axes = F, xlab = "", ylab = "")
lines(curves, lwd = 2, col = "black")
points(centroids2d[lc.idx, ], col = "black", pch = 15, cex = 3, xpd = T)
text(centroids2d[lc.idx, ], labels = names(lc), cex = 1, font = 2, col = "white", xpd = T)
}
vars <- rownames(res[1:15, ])
vars <- na.omit(vars[vars != "NA"])
for (i in vars) {
x <- drop0(obj@assays$SCT@data)[i, ]
x <- (x - min(x)) / (max(x) - min(x))
o <- order(x)
plot(obj@reductions$umap@cell.embeddings[o, ],
main = paste0(i), pch = 16, cex = 0.5, axes = F, xlab = "", ylab = "",
col = colorRampPalette(c("lightgray", "grey60", "navy"))(99)[x[o] * 98 + 1]
)
}

Genes that change between two pseudotime points
res <- na.omit(startVsEndTest(sceGAM, pseudotimeValues = c(0, 1)))
res <- res[res$pvalue < 1e-3, ]
res <- res[res$waldStat > mean(res$waldStat), ]
res <- res[order(res$waldStat, decreasing = T), ]
res[1:10, 1:6]
identify which genes go up or down. Let’s check lineage 1:
# Get the top UP and Down regulated in lineage 1
res_lin1 <- sort(setNames(res$logFClineage1, rownames(res)))
vars <- names(c(rev(res_lin1)[1:7], res_lin1[1:8]))
vars <- na.omit(vars[vars != "NA"])
par(mfrow = c(4, 4), mar = c(.1, .1, 2, 1))
{
plot(obj@reductions$umap@cell.embeddings, col = pal[obj$seurat_clusters], cex = .5, pch = 16, axes = F, xlab = "", ylab = "")
lines(curves, lwd = 2, col = "black")
points(centroids2d[lc.idx, ], col = "black", pch = 15, cex = 3, xpd = T)
text(centroids2d[lc.idx, ], labels = names(lc), cex = 1, font = 2, col = "white", xpd = T)
}
for (i in vars) {
x <- drop0(obj@assays$SCT@data)[i, ]
x <- (x - min(x)) / (max(x) - min(x))
o <- order(x)
plot(obj@reductions$umap@cell.embeddings[o, ],
main = paste0(i), pch = 16, cex = 0.5, axes = F, xlab = "", ylab = "",
col = colorRampPalette(c("lightgray", "grey60", "navy"))(99)[x[o] * 98 + 1]
)
}

Genes that are different between lineages
res <- na.omit(diffEndTest(sceGAM))
res <- res[res$pvalue < 1e-3, ]
res <- res[res$waldStat > mean(res$waldStat), ]
res <- res[order(res$waldStat, decreasing = T), ]
res[1:10, ]
NA
pairwise comparison between each lineage. Let’s check lineage 1 vs
lineage 2:
# Get the top UP and Down regulated in lineage 1 vs 2
res_lin1_2 <- sort(setNames(res$logFC1_2, rownames(res)))
vars <- names(c(rev(res_lin1_2)[1:7], res_lin1_2[1:8]))
vars <- na.omit(vars[vars != "NA"])
par(mfrow = c(4, 4), mar = c(.1, .1, 2, 1))
{
plot(obj@reductions$umap@cell.embeddings, col = pal[obj$seurat_clusters], cex = .5, pch = 16, axes = F, xlab = "", ylab = "")
lines(curves, lwd = 2, col = "black")
points(centroids2d[lc.idx, ], col = "black", pch = 15, cex = 3, xpd = T)
text(centroids2d[lc.idx, ], labels = names(lc), cex = 1, font = 2, col = "white", xpd = T)
}
for (i in vars) {
x <- drop0(obj@assays$SCT@data)[i, ]
x <- (x - min(x)) / (max(x) - min(x))
o <- order(x)
plot(obj@reductions$umap@cell.embeddings[o, ],
main = paste0(i), pch = 16, cex = 0.5, axes = F, xlab = "", ylab = "",
col = colorRampPalette(c("lightgray", "grey60", "navy"))(99)[x[o] * 98 + 1]
)
}

---
title: "Trajectory inference using Slingshot"
author: Nasir Mahmood Abbasi
date: "`r Sys.Date()`"
output:
  #rmdformats::readthedown
  html_notebook:
    toc: true
    toc_float: true
    toc_collapsed: true
---

# 1. load libraries
```{r, include=FALSE}
suppressPackageStartupMessages({
  library(Seurat)
  library(plotly)
  options(rgl.printRglwidget = TRUE)
  library(Matrix)
  library(sparseMatrixStats)
  library(slingshot)
  library(tradeSeq)
  library(patchwork)
})


```

# 2. Nice function to easily draw a graph:
```{r, include=FALSE}
# Add graph to the base R graphics plot
draw_graph <- function(layout, graph, lwd = 0.2, col = "grey") {
  res <- rep(x = 1:(length(graph@p) - 1), times = (graph@p[-1] - graph@p[-length(graph@p)]))
  segments(
    x0 = layout[graph@i + 1, 1], x1 = layout[res, 1],
    y0 = layout[graph@i + 1, 2], y1 = layout[res, 2], lwd = lwd, col = col
  )
}
```

# 3. Reading data
```{r}
load("../2025_NewHarmony_Integrated_Files/0-imp_Robj/0-robj/5-Harmony_Integrated_All_samples_Merged_CD4Tcells_final_Resolution_Selected_0.8_ADT_Normalized_cleaned_mt.robj")

# Define some color palette
pal <- c(scales::hue_pal()(8), RColorBrewer::brewer.pal(9, "Set1"), RColorBrewer::brewer.pal(8, "Set2"))
set.seed(1)
pal <- rep(sample(pal, length(pal)), 200)

```


# 4. Trajectory inference using Slingshot
```{r, echo=FALSE}
# obj <-All_samples_Merged
# 
# rm(All_samples_Merged)
# 
# gc()

# Calculate cluster centroids (for plotting the labels later)
mm <- sparse.model.matrix(~ 0 + factor(obj$seurat_clusters))
colnames(mm) <- levels(factor(obj$seurat_clusters))
centroids2d <- as.matrix(t(t(obj@reductions$umap@cell.embeddings) %*% mm) / Matrix::colSums(mm))

```


## Let’s visualize which clusters we have in our dataset:
```{r, fig.height=6, fig.width=10}

vars <- c("Patient_origin", "cell_line", "seurat_clusters", "Phase")
pl <- list()

for (i in vars) {
  pl[[i]] <- DimPlot(obj, group.by = i, label = T) + theme_void() + NoLegend()
}
wrap_plots(pl)

table(obj$seurat_clusters)
```

# 5. Exploring the data
```{r}

vars <- c("CD34", "TIGIT", "NKG7", "GNLY", "IFNG", "PRF1", "GZMB", "IL4", "IL13", "IL2RA", "LAG3")
pl <- list()

pl <- list(DimPlot(obj, group.by = "seurat_clusters", label = T) + theme_void() + NoLegend())
for (i in vars) {
  pl[[i]] <- FeaturePlot(obj, features = i, order = T) + theme_void() + NoLegend()
}
wrap_plots(pl)


```
# 6. compute the lineages on these dataset
```{r}
# Define lineage ends
ENDS <- c("2", "6", "8")

set.seed(1)
lineages <- as.SlingshotDataSet(getLineages(
  data           = obj@reductions$umap@cell.embeddings,
  clusterLabels  = obj$seurat_clusters,
  dist.method    = "mnn", # It can be: "simple", "scaled.full", "scaled.diag", "slingshot" or "mnn"
  end.clus       = ENDS, # You can also define the ENDS!
  start.clus     = "3"
)) # define where to START the trajectories


# IF NEEDED, ONE CAN ALSO MANULALLY EDIT THE LINEAGES, FOR EXAMPLE:
# sel <- sapply( lineages@lineages, function(x){rev(x)[1]} ) %in% ENDS
# lineages@lineages <- lineages@lineages[ sel ]
# names(lineages@lineages) <- paste0("Lineage",1:length(lineages@lineages))
# lineages


# Change the reduction to our "fixed" UMAP2d (FOR VISUALISATION ONLY)
lineages@reducedDim <- obj@reductions$umap@cell.embeddings

{
  plot(obj@reductions$umap@cell.embeddings, col = pal[obj$seurat_clusters], cex = .5, pch = 16)
  lines(lineages, lwd = 1, col = "black", cex = 2)
  text(centroids2d, labels = rownames(centroids2d), cex = 0.8, font = 2, col = "white")
}

```

##  Defining Principal Curves
```{r}

# Define curves
curves <- as.SlingshotDataSet(getCurves(
  data          = lineages,
  thresh        = 1e-1,
  stretch       = 1e-1,
  allow.breaks  = F,
  approx_points = 100
))


# Plots
{
  plot(obj@reductions$umap@cell.embeddings, col = pal[obj$seurat_clusters], pch = 16)
  lines(curves, lwd = 2, col = "black")
  text(centroids2d, labels = levels(obj$seurat_clusters), cex = 1, font = 2)
}
```

##  compute the differentiation pseudotime
```{r}

pseudotime <- slingPseudotime(curves, na = FALSE)
cellWeights <- slingCurveWeights(curves)

x <- rowMeans(pseudotime)
x <- x / max(x)
o <- order(x)

{
  plot(obj@reductions$umap@cell.embeddings[o, ],
    main = paste0("pseudotime"), pch = 16, cex = 0.4, axes = F, xlab = "", ylab = "",
    col = colorRampPalette(c("grey70", "orange3", "firebrick", "purple4"))(99)[x[o] * 98 + 1]
  )
  points(centroids2d, cex = 2.5, pch = 16, col = "#FFFFFF99")
  text(centroids2d, labels = levels(obj$seurat_clusters), cex = 1, font = 2)
}
```
# 7. Finding differentially expressed genes
```{r}

sel_cells <- split(colnames(obj@assays$SCT@data), obj$seurat_clusters)
sel_cells <- unlist(lapply(sel_cells, function(x) {
  set.seed(1)
  return(sample(x, 20))
}))

gv <- as.data.frame(na.omit(scran::modelGeneVar(obj@assays$SCT@data[, sel_cells])))
gv <- gv[order(gv$bio, decreasing = T), ]
sel_genes <- sort(rownames(gv)[1:500])

path_file <- "/home/bioinfo/data/trajectory/seurat_scegam.rds"

# fetch_data is defined at the top of this document
sceGAM <- fitGAM(
  counts = drop0(obj@assays$SCT@data[sel_genes, sel_cells]),
  pseudotime = pseudotime[sel_cells, ],
  cellWeights = cellWeights[sel_cells, ],
  nknots = 5, verbose = TRUE, parallel = TRUE, sce = TRUE,
  BPPARAM = BiocParallel::MulticoreParam()
)



plotGeneCount(curves, clusters = obj$seurat_clusters, models = sceGAM)


lineages

lc <- sapply(lineages@lineages, function(x) {
  rev(x)[1]
})
names(lc) <- gsub("Lineage", "L", names(lc))
lc.idx = match(lc, levels(obj$seurat_clusters))

{
  plot(obj@reductions$umap@cell.embeddings, col = pal[obj$seurat_clusters], pch = 16)
  lines(curves, lwd = 2, col = "black")
  points(centroids2d[lc.idx, ], col = "black", pch = 16, cex = 4)
  text(centroids2d[lc.idx, ], labels = names(lc), cex = 1, font = 2, col = "white")
}




```








## Genes that change with pseudotime
```{r}

set.seed(8)
res <- na.omit(associationTest(sceGAM, contrastType = "consecutive"))
res <- res[res$pvalue < 1e-3, ]
res <- res[res$waldStat > mean(res$waldStat), ]
res <- res[order(res$waldStat, decreasing = T), ]
res[1:10, ]

```


## We can plot their expression
```{r}

par(mfrow = c(4, 4), mar = c(.1, .1, 2, 1))
{
  plot(obj@reductions$umap@cell.embeddings, col = pal[obj$seurat_clusters], cex = .5, pch = 16, axes = F, xlab = "", ylab = "")
  lines(curves, lwd = 2, col = "black")
  points(centroids2d[lc.idx, ], col = "black", pch = 15, cex = 3, xpd = T)
  text(centroids2d[lc.idx, ], labels = names(lc), cex = 1, font = 2, col = "white", xpd = T)
}

vars <- rownames(res[1:15, ])
vars <- na.omit(vars[vars != "NA"])

for (i in vars) {
  x <- drop0(obj@assays$SCT@data)[i, ]
  x <- (x - min(x)) / (max(x) - min(x))
  o <- order(x)
  plot(obj@reductions$umap@cell.embeddings[o, ],
    main = paste0(i), pch = 16, cex = 0.5, axes = F, xlab = "", ylab = "",
    col = colorRampPalette(c("lightgray", "grey60", "navy"))(99)[x[o] * 98 + 1]
  )
}

```



## Genes that change between two pseudotime points
```{r}
res <- na.omit(startVsEndTest(sceGAM, pseudotimeValues = c(0, 1)))
res <- res[res$pvalue < 1e-3, ]
res <- res[res$waldStat > mean(res$waldStat), ]
res <- res[order(res$waldStat, decreasing = T), ]
res[1:10, 1:6]
```
## identify which genes go up or down. Let’s check lineage 1:
```{r}
# Get the top UP and Down regulated in lineage 1
res_lin1 <- sort(setNames(res$logFClineage1, rownames(res)))
vars <- names(c(rev(res_lin1)[1:7], res_lin1[1:8]))
vars <- na.omit(vars[vars != "NA"])

par(mfrow = c(4, 4), mar = c(.1, .1, 2, 1))

{
  plot(obj@reductions$umap@cell.embeddings, col = pal[obj$seurat_clusters], cex = .5, pch = 16, axes = F, xlab = "", ylab = "")
  lines(curves, lwd = 2, col = "black")
  points(centroids2d[lc.idx, ], col = "black", pch = 15, cex = 3, xpd = T)
  text(centroids2d[lc.idx, ], labels = names(lc), cex = 1, font = 2, col = "white", xpd = T)
}

for (i in vars) {
  x <- drop0(obj@assays$SCT@data)[i, ]
  x <- (x - min(x)) / (max(x) - min(x))
  o <- order(x)
  plot(obj@reductions$umap@cell.embeddings[o, ],
    main = paste0(i), pch = 16, cex = 0.5, axes = F, xlab = "", ylab = "",
    col = colorRampPalette(c("lightgray", "grey60", "navy"))(99)[x[o] * 98 + 1]
  )
}
```

## Genes that are different between lineages
```{r}
res <- na.omit(diffEndTest(sceGAM))
res <- res[res$pvalue < 1e-3, ]
res <- res[res$waldStat > mean(res$waldStat), ]
res <- res[order(res$waldStat, decreasing = T), ]
res[1:10, ]

```

## pairwise comparison between each lineage. Let’s check lineage 1 vs lineage 2:
```{r}
# Get the top UP and Down regulated in lineage 1 vs 2
res_lin1_2 <- sort(setNames(res$logFC1_2, rownames(res)))
vars <- names(c(rev(res_lin1_2)[1:7], res_lin1_2[1:8]))
vars <- na.omit(vars[vars != "NA"])

par(mfrow = c(4, 4), mar = c(.1, .1, 2, 1))
{
  plot(obj@reductions$umap@cell.embeddings, col = pal[obj$seurat_clusters], cex = .5, pch = 16, axes = F, xlab = "", ylab = "")
  lines(curves, lwd = 2, col = "black")
  points(centroids2d[lc.idx, ], col = "black", pch = 15, cex = 3, xpd = T)
  text(centroids2d[lc.idx, ], labels = names(lc), cex = 1, font = 2, col = "white", xpd = T)
}

for (i in vars) {
  x <- drop0(obj@assays$SCT@data)[i, ]
  x <- (x - min(x)) / (max(x) - min(x))
  o <- order(x)
  plot(obj@reductions$umap@cell.embeddings[o, ],
    main = paste0(i), pch = 16, cex = 0.5, axes = F, xlab = "", ylab = "",
    col = colorRampPalette(c("lightgray", "grey60", "navy"))(99)[x[o] * 98 + 1]
  )
}
```




