Load pbmc

pbmc = readRDS(here("Reports", "meeting_21Jan_2022", "pbmc.rds"))

UMAP

# UMAP
pbmc %>% 
  sample_n(10000) %>% 
  {
    .x=(.)
    ggplot(.x, aes(refUMAP_1, refUMAP_2)) +
      geom_point(aes(fill =predicted.celltype.l2), stroke = 0, shape = 21, size=1, alpha=1 )+
      ggrepel::geom_text_repel(
        data=   get_labels_clusters(
          .x %>% sample_n(100), 
          predicted.celltype.l2,
          refUMAP_1, 
          refUMAP_2
        ) ,
        aes(refUMAP_1, refUMAP_2, label = predicted.celltype.l2), size = 2.5
      ) +
      scale_fill_manual(values= pbmc %>% distinct(predicted.celltype.l2, .color) %>% deframe()) +
      guides(fill = guide_legend(override.aes = list(size = 1, alpha=1) ) ) +
      theme_UMAP +
      theme(axis.title.y = element_blank(), legend.position = "right")
  }
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.

Composition

# Composition
pbmc %>% 
  count(sample, predicted.celltype.l2, donor, severity, simple_timepoint) %>% 
  with_groups(c(sample), ~ mutate(.x, proportion = n/sum(n))) %>% 
  ggplot(aes(sample, proportion, fill = predicted.celltype.l2)) +
  geom_bar(position = "stack", stat = "identity") +
  facet_grid(~severity, scale="free_x") +
  scale_fill_manual(values = pbmc %>% distinct(predicted.celltype.l2, .color) %>% deframe()) +
  multipanel_theme +
  theme(axis.text.x = element_text(angle=90))
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.

Differentia composition

estimates = readRDS(here("Reports", "meeting_21Jan_2022", "sccomp_estimates.rds") )

build_sccomp_boxplot(estimates, pbmc, predicted.celltype.l2)
## tidyseurat says: A data frame is returned for independent data analysis.
## Joining, by = "sample"
## Joining, by = "predicted.celltype.l2"

Study of T cells

# Study of T cells
pbmc %>% 
  {
    .x = (.)
    DefaultAssay(.x) <- 'ADT'
    .x
  } %>% 
  FeaturePlot(features = c("CD4", "CD8"), blend = TRUE, reduction = "ref.umap")

pbmc %>% 
  {
    .x = (.)
    DefaultAssay(.x) <- 'ADT'
    .x
  } %>% 
  FeatureScatter(
    feature1 = "CD4", feature2 = "CD8" , 
    group.by = "predicted.celltype.l2", 
    cols = pbmc %>% distinct(predicted.celltype.l2, .color) %>% deframe(), pt.size = 0.2
  )
## tidyseurat says: A data frame is returned for independent data analysis.

pbmc %>% 
  {
    .x = (.)
    DefaultAssay(.x) <- 'SCT'
    .x
  } %>% 
  FeatureScatter(
    feature1 = "CD4", feature2 = "CD8A" , 
    group.by = "predicted.celltype.l2", 
    cols = pbmc %>% distinct(predicted.celltype.l2, .color) %>% deframe(), pt.size = 0.2
  )
## tidyseurat says: A data frame is returned for independent data analysis.

pbmc %>% 
  {
    .x = (.)
    DefaultAssay(.x) <- 'ADT'
    .x
  } %>% 
  VlnPlot(
    features = c("CD4", "CD8"), 
    group.by = "predicted.celltype.l2", 
    cols = pbmc %>% distinct(predicted.celltype.l2, .color) %>% deframe(), pt.size = 0
  )
## tidyseurat says: A data frame is returned for independent data analysis.

# Cleanup
rm(pbmc)
gc()
##              used   (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells    9547977  510.0   17574428   938.6   17574428   938.6
## Vcells 1123775361 8573.8 3190488341 24341.5 2507605696 19131.6

CD8

UMAP

pbmc_CD8 = readRDS(here("Reports", "meeting_1Feb_2022", "pbmc_CD8.rds"))

pbmc_CD8 = 
  pbmc_CD8 %>% 
   RunUMAP(
    nn.name = "weighted.nn", reduction.name = "wnn.umap", reduction.key = "wnnUMAP_", 
    spread    = 0.5,min.dist  = 0.01, n.neighbors = 10L
  ) %>% 
  FindClusters( graph.name = "wsnn", algorithm = 3, resolution = 0.3, verbose = FALSE)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 03:18:19 UMAP embedding parameters a = 5.72 b = 0.8109
## 03:18:20 Commencing smooth kNN distance calibration using 1 thread
## 03:18:22 Initializing from normalized Laplacian + noise
## 03:18:23 Commencing optimization for 200 epochs, with 763540 positive edges
## 03:18:53 Optimization finished
pbmc_CD8 %>% 
  DimPlot( reduction = 'wnn.umap', group.by = "seurat_clusters", label = TRUE, repel = TRUE, label.size = 2.5) 

pbmc_CD8 %>% 
  DimPlot( reduction = 'wnn.umap', group.by = "predicted.celltype.l2", label = TRUE, repel = TRUE, label.size = 2.5) 

pbmc_CD8 %>% 
  mutate(weight = RNA.weight - ADT.weight) %>% 
  FeaturePlot(features = "weight", reduction = 'wnn.umap',  label = TRUE, repel = TRUE, label.size = 2.5 ) +
  scale_color_distiller(palette = "Spectral")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

Cell markers

readRDS(here("Reports", "meeting_1Feb_2022", "pbmc_CD8_markers.rds")) %>% 
  mutate(plot = map2(plot.x, plot.y, ~ {if(!is.null(.x) & !is.null(.y)) {.x / .y} else {.x} })) %>% 
  pull(plot) 
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

Differentil composition

readRDS(here("Reports", "meeting_1Feb_2022", "sccomp_estimates_CD8.rds")) %>% 

build_sccomp_boxplot(pbmc_CD8, seurat_clusters)
## tidyseurat says: A data frame is returned for independent data analysis.
## Joining, by = "sample"
## Joining, by = "seurat_clusters"

Trajectory analyses

pbmc_CD8_slingshot = readRDS(here("Reports", "meeting_1Feb_2022", "pbmc_CD8_slingshot.rds"))

pbmc_CD8_slingshot %>% 
  make_trajectory_plot(
    "diffusion", 
    predicted.celltype.l2, 
    pbmc_CD8 %>% distinct(predicted.celltype.l2, .color) %>% deframe()
  )
## tidyseurat says: A data frame is returned for independent data analysis.
## iteration: 1
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16
## 17
## 18
## 19
## 20
## 21
## 22
## 23
## 24
## 25
## 26
## 27
## 28
## 29
## 30
## 31
## 32
## 33
## 34
## 35
## 36
## 37
## 38
## 39
## 40
## 
## Joining, by = ".cell"
## tidyHeatmap says: (once per session) from release 1.2.3 the grouping labels have white background by default. To add color for one-ay grouping specify palette_grouping = list(c("red", "blue"))
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 columns You can control `use_raster` argument by explicitly
## setting TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

pbmc_CD8_slingshot %>% 
  make_trajectory_plot(
    "diffusion", 
    seurat_clusters, 
    pbmc_CD8 %>% distinct(predicted.celltype.l2, CD8_color) %>% deframe()
  )
## tidyseurat says: A data frame is returned for independent data analysis.
## iteration: 1
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16
## 17
## 18
## 19
## 20
## 21
## 22
## 23
## 24
## 25
## 26
## 27
## 28
## 29
## 30
## 31
## 32
## 33
## 34
## 35
## 36
## 37
## 38
## 39
## 40
## 
## Joining, by = ".cell"
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 columns You can control `use_raster` argument by explicitly
## setting TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

# Cleanup
rm(pbmc_CD8)
gc()
##              used   (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   10160263  542.7   17574428   938.6   17574428   938.6
## Vcells 1288125003 9827.7 3190488341 24341.5 3190483882 24341.5

CD4

UMAP

pbmc_CD4 = readRDS(here("Reports", "meeting_1Feb_2022", "pbmc_CD4.rds"))

pbmc_CD4 = 
  pbmc_CD4 %>% 
   RunUMAP(
    nn.name = "weighted.nn", reduction.name = "wnn.umap", reduction.key = "wnnUMAP_", 
    spread    = 0.5,min.dist  = 0.01, n.neighbors = 10L
  ) %>% 
  FindClusters( graph.name = "wsnn", algorithm = 3, resolution = 0.3, verbose = FALSE)
## 03:27:58 UMAP embedding parameters a = 5.72 b = 0.8109
## 03:28:00 Commencing smooth kNN distance calibration using 1 thread
## 03:28:03 Initializing from normalized Laplacian + noise
## 03:28:03 Commencing optimization for 200 epochs, with 1047750 positive edges
## 03:28:45 Optimization finished
pbmc_CD4 %>% 
  DimPlot( reduction = 'wnn.umap', group.by = "seurat_clusters", label = TRUE, repel = TRUE, label.size = 2.5) 

pbmc_CD4 %>% 
  DimPlot( reduction = 'wnn.umap', group.by = "predicted.celltype.l2", label = TRUE, repel = TRUE, label.size = 2.5) 

pbmc_CD4 %>% 
  mutate(weight = RNA.weight - ADT.weight) %>% 
  FeaturePlot(features = "weight", reduction = 'wnn.umap',  label = TRUE, repel = TRUE, label.size = 2.5 ) +
  scale_color_distiller(palette = "Spectral")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

Cell markers

readRDS("pbmc_CD4_markers.rds") %>% 
  mutate(plot = map2(plot.x, plot.y, ~ {if(!is.null(.x) & !is.null(.y)) {.x / .y} else {.x} })) %>% 
  pull(plot) 
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

Differentil composition

readRDS(here("Reports", "meeting_1Feb_2022", "sccomp_estimates_CD4.rds")) %>% 

build_sccomp_boxplot(pbmc_CD4, seurat_clusters)
## tidyseurat says: A data frame is returned for independent data analysis.
## Joining, by = "sample"
## Joining, by = "seurat_clusters"

Trajectory analyses

Slingshot on UMAP

pbmc_CD4_slingshot_UMAP  = 
  pbmc_CD4 %>% 
  RunUMAP(
    nn.name = "weighted.nn", reduction.name = "wnn.umap", reduction.key = "wnnUMAP_", 
    spread    = 0.5,min.dist  = 0.01, n.neighbors = 10L
  ) %>% 
  FindClusters( graph.name = "wsnn", algorithm = 3, resolution = 0.3, verbose = FALSE) %>% 
  as.SingleCellExperiment() %>% 
  slingshot::slingshot(
    clusterLabels = pull(., seurat_clusters), 
    reducedDim = "WNN.UMAP",
    start.clus= '0', end.clus = c("5", "2" )
  )

saveRDS(pbmc_CD4_slingshot_UMAP , "Reports/meeting_1Feb_2022/pbmc_CD4_slingshot_UMAP.rds")
pbmc_CD4_slingshot_UMAP = readRDS( here("Reports", "meeting_1Feb_2022", "pbmc_CD4_slingshot_UMAP.rds"))

plot(reducedDims(pbmc_CD4_slingshot_UMAP)$WNN.UMAP, col = friendly_cols[as.numeric(as.factor(pbmc_CD4_slingshot_UMAP$`seurat_clusters`))])
lines(SlingshotDataSet(pbmc_CD4_slingshot_UMAP), lwd=2, col='black')

TSCAN

library(scater)
## Loading required package: scuttle
library(TSCAN)
centroids = 
  pbmc_CD4_slingshot_UMAP %>% 
  as_tibble() %>%
  with_groups(seurat_clusters, ~ .x %>% summarise(wnnUMAP_1 = mean(wnnUMAP_1), wnnUMAP_2 = mean(wnnUMAP_2))) %>% 
  tidybulk::as_matrix(rownames = seurat_clusters) 

mst <- centroids %>%
  createClusterMST(clusters=NULL)

line.data <- reportEdges(centroids, mst=mst, clusters=NULL)

pbmc_CD4_slingshot_UMAP %>% 
  ggplot() +
  geom_point(aes(wnnUMAP_1 , wnnUMAP_2, color=factor(seurat_clusters))) +
  geom_line(data=line.data, mapping=aes(x= wnnUMAP_1, y= wnnUMAP_2, group=edge))

# Cleanup
rm(pbmc_CD4)
gc()
##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   10880266   581.1   17574428   938.6   17574428   938.6
## Vcells 2470971765 18852.1 5606967912 42777.8 4883425329 37257.6