Set-up

library(here) # paths
## here() starts at U:/Datastore/CMVM/scs/groups/jpriller-GROUP/scRNAseq/R-analysis-young
library(scran) # feature select
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
## 
##     windows
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
## Loading required package: scuttle
library(batchelor) # correct batch effect
library(scater) # dimred calculation
## Loading required package: ggplot2
library(bluster) # clustering
library(slingshot) # trajectory
## Loading required package: princurve
## Warning: package 'princurve' was built under R version 4.1.3
## Loading required package: TrajectoryUtils
## 
## Attaching package: 'TrajectoryUtils'
## The following object is masked from 'package:scran':
## 
##     createClusterMST
library(ggridges) # density plots
project <- "fire-mice"
source(here("src/colours.R"))

Subset

sce <- readRDS(here("processed", project, "sce_anno_02.RDS"))
oligodendroglia <- c("OPC", "pOPC", "iOligo", "mOligo_1", "mOligo_2", "mOligo_3", 
"mOligo_4")
sce <- sce[,sce$clusters_named %in% oligodendroglia]

# remove unnecessary slots that are not representative anymore
for(dim in reducedDimNames(sce)){
  reducedDim(sce, dim) <- NULL
}
clusters_k <- grep("cluster_k", colnames(colData(sce)), value = T)
clusters_seurat <- grep("originalexp_snn_res",colnames(colData(sce)), value = T)
for(meta in c(clusters_seurat, clusters_k, "cluster_names")){
  colData(sce)[[meta]] <- NULL
}
assay(sce, "reconstructed") <- NULL

As the QC thresholds in “QC_norm_featureselect_dimred_02.Rmd” are already celltype specific, ther is no need to redo a cell QC after subsetting. however some genes might not be expressed in this subsetted dataset, and will be removed for downstream. same threshold as in the past is used, mimimum 10 cells have to express the gene in order to keep it.

keep_feature <- rowSums(counts(sce) > 0) > 10
sce <- sce[keep_feature,]

Feature selection

After subsetting the data the variable genes need to be selected again to better represent the variance in this cell population. We follow the same methods than for our first feature selection, selecting here for the top 2000 genes.

gene_var_df <- modelGeneVar(sce, density.weights = FALSE) # density weights avoids overfitting due to high-abundance genes being HV
gene_var <- metadata(gene_var_df)
plot(gene_var$mean, gene_var$var, xlab= "Mean of log-expression", ylab= "Variance of log-expression")
curve(gene_var$trend(x), lwd=2, add=T, col = "red")

# select hvgs
hvgs <- getTopHVGs(gene_var_df, n=2000)
# save them in the object
rowSubset(sce) <- hvgs

Batch correction

The Dimensional reduction step is skipped as we want to work on the corrected space.

set.seed(100)
sce <- correctExperiments(sce,
  batch = factor(sce$chip),
  subset.row = rowSubset(sce),
  correct.all=TRUE,
  PARAM = FastMnnParam(
  merge.order = 
    list(list("3","5"), list("4","6")),
  d = 23,
  prop.k=0.10
  )
)
## Warning in .eliminate_overlaps(colnames(colData(merged)),
## colnames(colData(original)), : ignoring 'colData' fields with same name as
## 'batchCorrect' output
## Warning in .eliminate_overlaps(colnames(rowData(merged)),
## colnames(rowData(original)), : ignoring 'rowData' fields with same name as
## 'batchCorrect' output
## Warning in .eliminate_overlaps(names(metadata(merged)),
## names(metadata(original)), : ignoring 'metadata' with same name as
## 'batchCorrect' output
# compute dimensional reduction 
set.seed(100)
sce <- runTSNE(sce,  dimred="corrected")

set.seed(100)
sce <- runUMAP(sce, dimred="corrected")

plotReducedDim(sce, "UMAP", colour_by = "chip")

plotReducedDim(sce, "UMAP", colour_by = "clusters_named")

plotReducedDim(sce, "TSNE", colour_by = "clusters_named")

Trajectory analysis with SlingShot

# https://github.com/kstreet13/slingshot/issues/87
reducedDim(sce, "corrected-1") <- reducedDim(sce, "corrected")[,1:22]
# run slingshot
sce <- slingshot(sce, clusterLabels = "clusters_named", reducedDim= "corrected-1", start.clus="OPC")
summary(sce$slingPseudotime_1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   0.256   1.557   1.058   1.615   1.880    4121
# as there are 3 trajectory lineages a mean between them can be calucluated
sce$slingPseudotimes <- rowMeans(cbind(sce$slingPseudotime_1, sce$slingPseudotime_2, sce$slingPseudotime_3), na.rm = TRUE)

plotReducedDim(sce, "UMAP", colour_by = "slingPseudotimes") 

# plot the lineages
cols <- pals::viridis(100)[cut(sce$slingPseudotimes, breaks = 100)]
plot(reducedDims(sce)$corrected, col = cols, ylab = "PCA 2", xlab = "PCA 1")
lines(SlingshotDataSet(sce), lwd=2, col='black', type = "lineages")

# plot the smooth lines
cols <- pals::viridis(100)[cut(sce$slingPseudotimes, breaks = 100)]
plot(reducedDims(sce)$corrected, col = cols, ylab = "PCA 2", xlab = "PCA 1")
lines(SlingshotDataSet(sce), lwd=2, col='black')

# map back to what they correspond
plotReducedDim(sce, "corrected", colour_by= "clusters_named")  + ylab ("PCA 2") + xlab("PCA 1")

# lineages
slingLineages(SlingshotDataSet(sce))
## $Lineage1
## [1] "OPC"      "iOligo"   "mOligo_4" "mOligo_1" "mOligo_3"
## 
## $Lineage2
## [1] "OPC"      "iOligo"   "mOligo_4" "mOligo_1" "mOligo_2"
## 
## $Lineage3
## [1] "OPC"  "pOPC"

Two lineages are found (+ going to the pOPCs). They go through oligo4.

Map the trajectory back to the UMAP. this is the “smooth curve”option. Solution from Slingshot pdf. It looks better on the other embedding

po <- embedCurves(sce, "UMAP")
plot(reducedDims(sce)$UMAP, col = cols, asp = 1)
lines(SlingshotDataSet(po), lwd = 3)

# extract information from slingshot object
embed <- embedCurves(sce, "UMAP")
embed <- slingCurves(embed)[[1]] 
embed <- data.frame(embed$s[embed$ord, ])

# i need to figure out how to plot the lineages
plotUMAP(sce, colour_by = "slingPseudotime_1") +
  geom_path(data = embed, aes(x = Dim.1, y = Dim.2), size = 1.2)

# lineages
slingLineages(SlingshotDataSet(sce))
# specific cluster for KO is marked as a transitory stage.
# This can be modified to plot plus or less lines! ###
# loop over the paths and add each one separately.
gg <- plotReducedDim(sce, dimred = "corrected", colour_by="slingPseudotimes")
#embedded <- embedCurves(sce, "corrected")
embedded <- SlingshotDataSet(sce)
embedded <- slingCurves(embedded)
for (path in embedded) {
    embedded_l <- data.frame(path$s[path$ord,])
    gg <- gg + geom_path(data=embedded_l, aes(x=Dim.1, y=Dim.2), size=1.2)
}

gg

# From OSCA
sce$slingPseudotimes <- rowMeans(cbind(sce$slingPseudotime_1, sce$slingPseudotime_2), na.rm = TRUE)
# Need to loop over the paths and add each one separately.
gg <- plotUMAP(sce, colour_by="slingPseudotimes")
embedded <- embedCurves(sce, "UMAP")
embedded <- slingCurves(embedded)
for (path in embedded) {
    embedded_l <- data.frame(path$s[path$ord,])
    gg <- gg + geom_path(data=embedded_l, aes(x=Dim.1, y=Dim.2), size=1.2)
}

gg

Ok, the plottings are terrible, but the information is there.

Slingshot on the UMAP space.

This is the way to go to get nice embedded lines in the UMAP plot.

sce_umap <- sce
previous <- grep("slingPseudotime", colnames(colData(sce_umap)), value=T)
for(meta in previous){
  colData(sce_umap)[[meta]] <- NULL
}
# run slingshot
sce_umap <- slingshot(sce, clusterLabels = "clusters_named", reducedDim= "UMAP", start.clus="OPC")
summary(sce$slingPseudotime_1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   0.256   1.557   1.058   1.615   1.880    4121
plotReducedDim(sce_umap, "UMAP", colour_by = "slingPseudotime_1") 

# create a new psuedo time,mean of the two paths
sce_umap$slingPseudotimes <- rowMeans(cbind(sce_umap$slingPseudotime_1, sce_umap$slingPseudotime_2, sce_umap$slingPseudotime_3), na.rm = TRUE)
#sce_umap$slingPseudotimes[is.na(sce_umap$slingPseudotimes)] <- min(sce$slingPseudotimes)
cols <- pals::viridis(100)[cut(sce_umap$slingPseudotimes, breaks = 100)]
plot(reducedDims(sce_umap)$UMAP, col = cols)
lines(SlingshotDataSet(sce_umap), lwd=2, col='black', type = "lineages")

#plotReducedDim(sce, "corrected", colour_by= "clusters_named")

Just… no. No. iOligos should be in the path.

I tried another seed for the umap, where the oligo cluster is upside-down, and then the starting cluster is suddenly oligo-3 and then it branches to oligo4, oligo2 and oligo1.

saveRDS(sce, here("processed", project, "oligo_trajectory.rds"))

Plotting density

# Pseudotime densities (by treatment)
ds <- list(FIRE = density(sce$slingPseudotimes[colData(sce)$genotype == "KO"]),
           WT = density(sce$slingPseudotimes[colData(sce)$genotype == "WT"]),
           HET = density(sce$slingPseudotimes[colData(sce)$genotype == "HET"]) )
xlim <- range(c(ds$WT$x, ds$FIRE$x, ds$HET$x))
ylim <- range(c(ds$WT$y, ds$FIRE$y, ds$HET$y))
plot(xlim, ylim, col = "white", xlab = "Pseudotime", ylab = "")
polygon(c(min(ds$WT$x),ds$WT$x,max(ds$WT$x)),
        c(0,ds$WT$y,0), col = alpha(col_wt_het_ko[1], alpha = .5))
polygon(c(min(ds$HET$x),ds$HET$x,max(ds$HET$x)),
        c(0,ds$HET$y,0), col = alpha(col_wt_het_ko[2], alpha = .5))
polygon(c(min(ds$FIRE$x),ds$FIRE$x,max(ds$FIRE$x)),
        c(0,ds$FIRE$y,0), col = alpha(col_wt_het_ko[3], alpha = .5))
legend("topright", legend = c("WT","HET", "FIRE"), 
       fill = alpha(c(col_wt_het_ko[1], col_wt_het_ko[2],col_wt_het_ko[3]), alpha = .5), bty = "n")

# separate
plot(xlim, ylim, col = "white", xlab = "Pseudotime", ylab = "density WT")
polygon(c(min(ds$WT$x),ds$WT$x,max(ds$WT$x)),
        c(0,ds$WT$y,0), col = alpha(col_wt_het_ko[1], alpha = .5))

#legend("topright", legend = c("WT","HET", "FIRE"), 
#       fill = alpha(c(col_wt_het_ko[1], col_wt_het_ko[2],col_wt_het_ko[3]), alpha = .5), bty = "n")


plot(xlim, ylim, col = "white", xlab = "Pseudotime", ylab = "density HET")
polygon(c(min(ds$HET$x),ds$HET$x,max(ds$HET$x)),
        c(0,ds$HET$y,0), col = alpha(col_wt_het_ko[2], alpha = .5))

plot(xlim, ylim, col = "white", xlab = "Pseudotime", ylab = "density FIRE")
polygon(c(min(ds$FIRE$x),ds$FIRE$x,max(ds$FIRE$x)),
        c(0,ds$FIRE$y,0), col = alpha(col_wt_het_ko[3], alpha = .5))

# test for significance Kolmogorov-Smirnov Test (https://kstreet13.github.io/bioc2020trajectories/articles/workshopTrajectories.html)
ks.test(sce$slingPseudotimes[colData(sce)$genotype == "WT"]*100,
        sce$slingPseudotimes[colData(sce)$genotype == "KO"]*100)
## Warning in ks.test(sce$slingPseudotimes[colData(sce)$genotype == "WT"] * : p-
## value will be approximate in the presence of ties
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  sce$slingPseudotimes[colData(sce)$genotype == "WT"] * 100 and sce$slingPseudotimes[colData(sce)$genotype == "KO"] * 100
## D = 0.13577, p-value < 2.2e-16
## alternative hypothesis: two-sided
ks.test(sce$slingPseudotimes[colData(sce)$genotype == "WT"],
        sce$slingPseudotimes[colData(sce)$genotype == "HET"])
## Warning in ks.test(sce$slingPseudotimes[colData(sce)$genotype == "WT"], : p-
## value will be approximate in the presence of ties
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  sce$slingPseudotimes[colData(sce)$genotype == "WT"] and sce$slingPseudotimes[colData(sce)$genotype == "HET"]
## D = 0.06219, p-value = 3.775e-15
## alternative hypothesis: two-sided
More information about KS test:

https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test https://stats.stackexchange.com/questions/333892/is-the-kolmogorov-smirnov-test-too-strict-if-the-sample-size-is-large https://stats.stackexchange.com/questions/7771/how-do-i-calculate-the-effect-size-for-the-kolmogorov-smirnov-z-statistic

In this test, the statistic (D) indeed also reflects the effect size. The interpretation is that D is the maximum distance between the two cumulative distributions from our data. A big number, reflects a big distance, big effect; small number small effect. The maximum value is 1.

plot(ecdf(sce$slingPseudotimes[colData(sce)$genotype == "WT"]))
lines(ecdf(sce$slingPseudotimes[colData(sce)$genotype == "KO"]), col="red")

This is how the cumulative distributions look for the FIRE (red) and WT (black). The maximum distance is arround pseudotime (x) 1.5.

“there is a difference, but not sure it is biologically relevant. My first impression as well as yours was”these curves look similar”. The test will be significant with the tiniest of differences as the sample number is so big, as you well pointed and is also mentioned in the link I forwarded about picking noise. So, it is up to our human expertise to decide if the effect size is biologically relevant or not. Then, why not going back to our first human impression which was “these curves look similar”? ” For other possible interpretations go back to email thread.

df <- data.frame(Pseudotime=sce$slingPseudotimes, Genotype=sce$genotype)
ggplot(df,  aes(x=Pseudotime, y = Genotype, fill=Genotype )) + geom_density_ridges(alpha=.7) + scale_fill_manual(values = col_wt_het_ko)
## Picking joint bandwidth of 0.093

ggplot(df,  aes(x=Pseudotime, y = Genotype, fill=stat(x))) + geom_density_ridges_gradient(alpha=.7) + scale_fill_viridis_c(name="Pseudotime")
## Picking joint bandwidth of 0.093

Separate WT and KO

WT

## this would be interesting, but actually it's better to keep the same graph, so we can compare more easily. 
sce_WT <- sce[, sce$genotype == "WT"]
# remove the previous slingshot run
previous <- grep("slingPseudotime", colnames(colData(sce_WT)), value=T)
for(meta in previous){
  colData(sce_WT)[[meta]] <- NULL
}
keep_feature <- rowSums(counts(sce_WT) > 0) > 10
sce_WT <- sce_WT[keep_feature,]
gene_var_df <- modelGeneVar(sce_WT, density.weights = FALSE) # density weights avoids overfitting due to high-abundance genes being HV
gene_var <- metadata(gene_var_df)
plot(gene_var$mean, gene_var$var, xlab= "Mean of log-expression", ylab= "Variance of log-expression")
curve(gene_var$trend(x), lwd=2, add=T, col = "red")
# select hvgs
hvgs <- getTopHVGs(gene_var_df, n=2000)
# save them in the object
rowSubset(sce_WT) <- hvgs


#recompute the corrected graph 

set.seed(100)
sce <- correctExperiments(sce,
  batch = factor(sce$chip),
  subset.row = rowSubset(sce),
  correct.all=TRUE,
  PARAM = FastMnnParam(
  merge.order = 
    list(list("3","5"), list("4","6")),
  d = 23,
  prop.k=0.10
  )
)
# compute dimensional reduction 
set.seed(100)
sce <- runTSNE(sce,  dimred="corrected")

set.seed(100)
sce <- runUMAP(sce, dimred="corrected")

plotReducedDim(sce, "UMAP", colour_by = "chip")
plotReducedDim(sce, "UMAP", colour_by = "clusters_named")
# https://github.com/kstreet13/slingshot/issues/87
reducedDim(sce, "corrected-1") <- reducedDim(sce, "corrected")[,1:22]
sce_WT <- sce[, sce$genotype == "WT"]
# remove the previous slingshot run
previous <- grep("slingPseudotime", colnames(colData(sce_WT)), value=T)
for(meta in previous){
  colData(sce_WT)[[meta]] <- NULL
}
# run slingshot
sce_WT <- slingshot(sce_WT, clusterLabels = "clusters_named", reducedDim= "corrected-1", start.clus="OPC")
summary(sce_WT$slingPseudotime_1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.2349  1.5627  1.0543  1.6157  1.8495    1402
# as there are 3 trajectory lineages a mean between them can be calucluated
sce_WT$slingPseudotimes <- rowMeans(cbind(sce_WT$slingPseudotime_1, sce_WT$slingPseudotime_2, sce_WT$slingPseudotime_3), na.rm = TRUE)
plotReducedDim(sce_WT, "UMAP", colour_by = "slingPseudotimes") 

# plot the lineages
cols <- pals::viridis(100)[cut(sce_WT$slingPseudotimes, breaks = 100)]
plot(reducedDims(sce_WT)$corrected, col = cols)
lines(SlingshotDataSet(sce_WT), lwd=2, col='black', type = "lineages")

# plot the smooth lines
cols <- pals::viridis(100)[cut(sce_WT$slingPseudotimes, breaks = 100)]
plot(reducedDims(sce_WT)$corrected, col = cols)
lines(SlingshotDataSet(sce_WT), lwd=2, col='black')

# lineages
slingLineages(SlingshotDataSet(sce_WT))
## $Lineage1
## [1] "OPC"      "iOligo"   "mOligo_4" "mOligo_1" "mOligo_3"
## 
## $Lineage2
## [1] "OPC"      "iOligo"   "mOligo_4" "mOligo_1" "mOligo_2"
## 
## $Lineage3
## [1] "OPC"  "pOPC"
# map back to what they correspond
plotReducedDim(sce_WT, "corrected", colour_by= "clusters_named") + ylab ("PCA 2") + xlab("PCA 1") +  guides(color=guide_legend(title="Cluster"))

# plot the corrected dimension with pseudotime colour
gg <- plotReducedDim(sce_WT, dimred = "corrected", colour_by="slingPseudotimes") +
   ylab ("PCA 2") + xlab("PCA 1") + guides(colour= guide_colorbar(guide_legend("Pseudotime")))
# extract lines
embedded <- SlingshotDataSet(sce_WT)
embedded <- slingCurves(embedded)
# remove the first curve - it is confusing to see these two trajectories that are so close to each other in the representation
embedded <- embedded[2:3]
for (path in embedded) {
    embedded_l <- data.frame(path$s[path$ord,])
    gg <- gg + geom_path(data=embedded_l, aes(x=Dim.1, y=Dim.2), size=1.2)
}

gg

KO

sce_KO <- sce[, sce$genotype == "KO"]
# remove the previous slingshot run
previous <- grep("slingPseudotime", colnames(colData(sce_KO)), value=T)
for(meta in previous){
  colData(sce_KO)[[meta]] <- NULL
}
# run slingshot
sce_KO <- slingshot(sce_KO, clusterLabels = "clusters_named", reducedDim= "corrected-1", start.clus="OPC")
summary(sce_KO$slingPseudotime_1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.2344  1.5221  1.0870  1.6021  1.8995     248
# as there are 2 trajectory lineages a mean between them can be calucluated
sce_KO$slingPseudotimes <- rowMeans(cbind(sce_KO$slingPseudotime_1, sce_KO$slingPseudotime_2), na.rm = TRUE)
plotReducedDim(sce_KO, "UMAP", colour_by = "slingPseudotimes") 

# plot the lineages
cols <- pals::viridis(100)[cut(sce_KO$slingPseudotimes, breaks = 100)]
plot(reducedDims(sce_KO)$corrected, col = cols, ylab = "PCA 2", xlab = "PCA 1")
lines(SlingshotDataSet(sce_KO), lwd=2, col='black', type = "lineages")

# plot the smooth lines
cols <- pals::viridis(100)[cut(sce_KO$slingPseudotimes, breaks = 100)]
plot(reducedDims(sce_KO)$corrected, col = cols, ylab = "PCA 2", xlab = "PCA 1")
lines(SlingshotDataSet(sce_KO), lwd=2, col='black')

# lineages
slingLineages(SlingshotDataSet(sce_KO)) 
## $Lineage1
## [1] "OPC"      "iOligo"   "mOligo_4" "mOligo_3" "mOligo_1" "mOligo_2"
## 
## $Lineage2
## [1] "OPC"  "pOPC"
# map back to what they correspond
plotReducedDim(sce_KO, "corrected", colour_by= "clusters_named") + ylab ("PCA 2") + xlab("PCA 1") +  guides(color=guide_legend(title="Cluster"))

# plot the corrected dimension with pseudotime colour
gg <- plotReducedDim(sce_KO, dimred = "corrected", colour_by="slingPseudotimes") +
   ylab ("PCA 2") + xlab("PCA 1") + guides(colour= guide_colorbar(guide_legend("Pseudotime")))
# extract lines
embedded <- SlingshotDataSet(sce_KO)
embedded <- slingCurves(embedded)
for (path in embedded) {
    embedded_l <- data.frame(path$s[path$ord,])
    gg <- gg + geom_path(data=embedded_l, aes(x=Dim.1, y=Dim.2), size=1.2)
}

gg
Click to expand
sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United Kingdom.1252 
## [2] LC_CTYPE=English_United Kingdom.1252   
## [3] LC_MONETARY=English_United Kingdom.1252
## [4] LC_NUMERIC=C                           
## [5] LC_TIME=English_United Kingdom.1252    
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] pals_1.7                    ggridges_0.5.3             
##  [3] slingshot_2.0.0             TrajectoryUtils_1.0.0      
##  [5] princurve_2.1.6             bluster_1.2.1              
##  [7] scater_1.20.1               ggplot2_3.3.5              
##  [9] batchelor_1.8.1             scran_1.20.1               
## [11] scuttle_1.2.1               SingleCellExperiment_1.14.1
## [13] SummarizedExperiment_1.22.0 Biobase_2.52.0             
## [15] GenomicRanges_1.44.0        GenomeInfoDb_1.28.4        
## [17] IRanges_2.26.0              S4Vectors_0.30.2           
## [19] BiocGenerics_0.38.0         MatrixGenerics_1.4.3       
## [21] matrixStats_0.61.0          here_1.0.1                 
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7              RcppAnnoy_0.0.19         
##  [3] rprojroot_2.0.2           tools_4.1.1              
##  [5] bslib_0.3.1               utf8_1.2.2               
##  [7] R6_2.5.1                  irlba_2.3.3              
##  [9] ResidualMatrix_1.2.0      vipor_0.4.5              
## [11] uwot_0.1.10               DBI_1.1.1                
## [13] colorspace_2.0-2          withr_2.4.2              
## [15] gridExtra_2.3             tidyselect_1.1.1         
## [17] compiler_4.1.1            BiocNeighbors_1.10.0     
## [19] DelayedArray_0.18.0       labeling_0.4.2           
## [21] sass_0.4.0                scales_1.1.1             
## [23] stringr_1.4.0             digest_0.6.28            
## [25] rmarkdown_2.11            XVector_0.32.0           
## [27] dichromat_2.0-0           pkgconfig_2.0.3          
## [29] htmltools_0.5.2           sparseMatrixStats_1.4.2  
## [31] highr_0.9                 maps_3.4.0               
## [33] fastmap_1.1.0             limma_3.48.3             
## [35] rlang_0.4.12              DelayedMatrixStats_1.14.3
## [37] farver_2.1.0              jquerylib_0.1.4          
## [39] generics_0.1.1            jsonlite_1.7.2           
## [41] BiocParallel_1.26.2       dplyr_1.0.7              
## [43] RCurl_1.98-1.5            magrittr_2.0.1           
## [45] BiocSingular_1.8.1        GenomeInfoDbData_1.2.6   
## [47] Matrix_1.3-4              Rcpp_1.0.7               
## [49] ggbeeswarm_0.6.0          munsell_0.5.0            
## [51] fansi_0.5.0               viridis_0.6.2            
## [53] lifecycle_1.0.1           stringi_1.7.5            
## [55] yaml_2.2.1                edgeR_3.34.1             
## [57] zlibbioc_1.38.0           Rtsne_0.15               
## [59] plyr_1.8.6                grid_4.1.1               
## [61] dqrng_0.3.0               crayon_1.4.2             
## [63] lattice_0.20-45           cowplot_1.1.1            
## [65] beachmat_2.8.1            mapproj_1.2.7            
## [67] locfit_1.5-9.4            metapod_1.0.0            
## [69] knitr_1.36                pillar_1.6.4             
## [71] igraph_1.2.7              codetools_0.2-18         
## [73] ScaledMatrix_1.0.0        glue_1.4.2               
## [75] evaluate_0.14             vctrs_0.3.8              
## [77] gtable_0.3.0              purrr_0.3.4              
## [79] assertthat_0.2.1          xfun_0.27                
## [81] rsvd_1.0.5                RSpectra_0.16-0          
## [83] viridisLite_0.4.0         tibble_3.1.5             
## [85] beeswarm_0.4.0            cluster_2.1.2            
## [87] statmod_1.4.36            ellipsis_0.3.2