library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.1.2

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.0     ✓ forcats 0.5.1

## Warning: package 'purrr' was built under R version 4.1.2

## Warning: package 'dplyr' was built under R version 4.1.2

## Warning: package 'stringr' was built under R version 4.1.2

## Warning: package 'forcats' was built under R version 4.1.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(Seurat)

## Registered S3 method overwritten by 'spatstat.geom':
##   method     from
##   print.boxx cli

## Attaching SeuratObject

library(SeuratWrappers)
library(tidyseurat)

## Loading required package: ttservice

## ========================================
## tidyseurat version 0.4.0
## If you use TIDYSEURAT in published research, please cite:
## 
## Mangiola et al. Interfacing Seurat with the R tidy universe. Bioinformatics 2021.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(tidyseurat))
##   
## To restore the Seurat default display use options("restore_Seurat_show" = TRUE) 
## ========================================

## 
## Attaching package: 'tidyseurat'

## The following objects are masked from 'package:dplyr':
## 
##     add_count, bind_cols, bind_rows, count

## The following object is masked from 'package:stats':
## 
##     filter

library(tidySingleCellExperiment)

## Loading required package: SingleCellExperiment

## Loading required package: SummarizedExperiment

## Loading required package: MatrixGenerics

## Loading required package: matrixStats

## 
## Attaching package: 'matrixStats'

## The following object is masked from 'package:tidyseurat':
## 
##     count

## The following object is masked from 'package:dplyr':
## 
##     count

## 
## Attaching package: 'MatrixGenerics'

## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars

## Loading required package: GenomicRanges

## Loading required package: stats4

## Loading required package: BiocGenerics

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min

## Loading required package: S4Vectors

## 
## Attaching package: 'S4Vectors'

## The following object is masked from 'package:tidyseurat':
## 
##     rename

## The following objects are masked from 'package:dplyr':
## 
##     first, rename

## The following object is masked from 'package:tidyr':
## 
##     expand

## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname

## Loading required package: IRanges

## 
## Attaching package: 'IRanges'

## The following object is masked from 'package:tidyseurat':
## 
##     slice

## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice

## The following object is masked from 'package:purrr':
## 
##     reduce

## Loading required package: GenomeInfoDb

## Loading required package: Biobase

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

## 
## Attaching package: 'Biobase'

## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians

## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians

## 
## Attaching package: 'SummarizedExperiment'

## The following object is masked from 'package:SeuratObject':
## 
##     Assays

## The following object is masked from 'package:Seurat':
## 
##     Assays

## 
## Attaching package: 'tidySingleCellExperiment'

## The following object is masked from 'package:IRanges':
## 
##     slice

## The following object is masked from 'package:S4Vectors':
## 
##     rename

## The following object is masked from 'package:matrixStats':
## 
##     count

## The following objects are masked from 'package:tidyseurat':
## 
##     add_count, bind_cols, bind_rows, count, join_features,
##     join_transcripts, plot_ly, tidy

## The following object is masked from 'package:ttservice':
## 
##     join_features

## The following object is masked from 'package:SeuratObject':
## 
##     pbmc_small

## The following objects are masked from 'package:dplyr':
## 
##     add_count, bind_cols, bind_rows, count

## The following object is masked from 'package:stats':
## 
##     filter

library(here)

## here() starts at /stornext/Bioinf/data/bioinf-data/Papenfuss_lab/projects/mangiola.s/PostDoc/covid19pbmc

library(sccomp)

## Loading required package: rstan

## Loading required package: StanHeaders

## 
## rstan version 2.26.6 (Stan version 2.26.1)

## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)

## 
## Attaching package: 'rstan'

## The following object is masked from 'package:tidySingleCellExperiment':
## 
##     extract

## The following object is masked from 'package:tidyseurat':
## 
##     extract

## The following object is masked from 'package:tidyr':
## 
##     extract

library(scales)

## Warning: package 'scales' was built under R version 4.1.2

## 
## Attaching package: 'scales'

## The following object is masked from 'package:purrr':
## 
##     discard

## The following object is masked from 'package:readr':
## 
##     col_factor

# Load multipanel_theme
source("https://gist.githubusercontent.com/stemangiola/fc67b08101df7d550683a5100106561c/raw/2a5e64e29f79a1e30405903fec1280df8662eff4/ggplot_theme_multipanel")
source("~/PostDoc/oligo_breast//functions.R")

options(future.globals.maxSize = 50 * 1024 ^ 3) # for 50 Gb RAM


theme_UMAP = 
  list(
    multipanel_theme,
    theme(
      axis.text.x  = element_blank(),
      axis.ticks.x =  element_blank(),
      axis.text.y  = element_blank(),
      axis.ticks.y =  element_blank(),
      panel.grid.major = element_blank(), 
      panel.grid.minor = element_blank()
    ) 
  )

S_sqrt <- function(x){sign(x)*sqrt(abs(x))}
IS_sqrt <- function(x){x^2*sign(x)}
S_sqrt_trans <- function() trans_new("S_sqrt",S_sqrt,IS_sqrt)

Parse data

pbmc <-
  here("data", "pbmc_RNA_ADT_QC27_arcsinh_CLR_PCA.sPCA.UMAPs.nodoublets.nounknown.annotated.rds") %>%
  readRDS()  %>%

  # Filter doublets and erythrocytes from Peter analysis
  inner_join(
    here("data", "sce_singlets_no_erythrocytes.rds") %>%
      readRDS()  %>%
      as_tibble() %>%
      select(.cell, doublets.proportion, doublets.known, doublets.predicted ),
    by = ".cell"
  )  %>%

  # Filter cell types
  tidyseurat::add_count(predicted.celltype.l1, predicted.celltype.l2, name = "cell_count") %>%
  filter(cell_count>10) %>%
  filter(!predicted.celltype.l2 %in% c("Eryth"))

  # Color
color_df =
  pbmc %>%
  distinct(predicted.celltype.l2, predicted.celltype.l1) %>%
  nest(data = -predicted.celltype.l1) %>%
  mutate(.palette = c(  "Reds", "YlOrBr", "Reds", "Purples", "RdPu",   "Greens", "Blues",   "Greys" )) %>%
  unnest(data) %>%
  nest(data = -.palette) %>%
  mutate(data = map2(
    data, .palette,
    ~ .x %>%
      nest(data__ = -predicted.celltype.l2) %>%
      mutate(.color = gradient_n_pal(brewer_pal(palette=.y)(9))(seq(0.3, 1, length.out=n())) ) %>%
      unnest(data__)
  )) %>%
  unnest(data)

pbmc = pbmc %>% left_join(color_df)

job::job({ pbmc %>% saveRDS("Reports/meeting_21Jan_2022/pbmc.rds") })

Cell clusters

pbmc = here("Reports", "meeting_21Jan_2022", "pbmc.rds") %>% readRDS()

# UMAP
pbmc %>% 
  sample_n(10000) %>% 
  {
    .x=(.)
    ggplot(.x, aes(refUMAP_1, refUMAP_2)) +
      geom_point(aes(fill =predicted.celltype.l2), stroke = 0, shape = 21, size=1, alpha=1 )+
      ggrepel::geom_text_repel(
        data=   get_labels_clusters(
          .x %>% sample_n(100), 
          predicted.celltype.l2,
          refUMAP_1, 
          refUMAP_2
        ) ,
        aes(refUMAP_1, refUMAP_2, label = predicted.celltype.l2), size = 2.5
      ) +
      scale_fill_manual(values= pbmc %>% distinct(predicted.celltype.l2, .color) %>% deframe()) +
      guides(fill = guide_legend(override.aes = list(size = 1, alpha=1) ) ) +
      theme_UMAP +
      theme(axis.title.y = element_blank(), legend.position = "right")
  }

## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.

## tidyseurat says: A data frame is returned for independent data analysis.

Cell type composition

# Composition
pbmc %>% 
  count(sample, predicted.celltype.l2, donor, severity, simple_timepoint) %>% 
  with_groups(c(sample), ~ mutate(.x, proportion = n/sum(n))) %>% 
  ggplot(aes(sample, proportion, fill = predicted.celltype.l2)) +
  geom_bar(position = "stack", stat = "identity") +
  facet_grid(~severity, scale="free_x") +
  scale_fill_manual(values = pbmc %>% distinct(predicted.celltype.l2, .color) %>% deframe()) +
  multipanel_theme +
  theme(axis.text.x = element_text(angle=90))

## tidyseurat says: A data frame is returned for independent data analysis.

## tidyseurat says: A data frame is returned for independent data analysis.

Differential composition and variability

# Differential composition
estimates = 
  pbmc %>% 
  mutate(has_covid = severity %in% c("moderate", "severe")) %>% 
  sccomp_glm(
    ~ has_covid, sample, 
    predicted.celltype.l2, 
    formula_variability = ~has_covid
  )

estimates %>% saveRDS("Reports/meeting_21Jan_2022/sccomp_estimates.rds")

estimates = here("Reports", "meeting_21Jan_2022" , "sccomp_estimates.rds") %>% readRDS()



simulated_proportion = 
  estimates |>
  replicate_data(number_of_draws = 100) |>
  left_join(pbmc |> distinct(has_covid, sample))

## tidyseurat says: A data frame is returned for independent data analysis.

## Joining, by = "sample"

calc_boxplot_stat <- function(x) {
  coef <- 1.5
  n <- sum(!is.na(x))
  # calculate quantiles
  stats <- quantile(x, probs = c(0.0, 0.25, 0.5, 0.75, 1.0))
  names(stats) <- c("ymin", "lower", "middle", "upper", "ymax")
  iqr <- diff(stats[c(2, 4)])
  # set whiskers
  outliers <- x < (stats[2] - coef * iqr) | x > (stats[4] + coef * iqr)
  if (any(outliers)) {
    stats[c(1, 5)] <- range(c(stats[2:4], x[!outliers]), na.rm = TRUE)
  }
  return(stats)
}

plot_brca_boxplot =
  ggplot() +
  stat_summary(
    aes(has_covid, generated_proportions),
    fun.data = calc_boxplot_stat, geom="boxplot",
    fatten = 0.5, lwd=0.2,
    data =
      simulated_proportion %>%
      
      # Filter uanitles because of limits
      inner_join( estimates %>% distinct(predicted.celltype.l2, composition_pH0_has_covidTRUE)) ,
    color="blue"
    
  )+
  geom_boxplot(
    aes(has_covid, proportion, fill=interaction (composition_pH0_has_covidTRUE <0.025 , variability_pH0_has_covidTRUE <0.025 )),
    outlier.shape = NA,
    fatten = 0.5, lwd=0.2,
    data = estimates %>% unnest(count_data) %>% filter(!outlier) %>% 
      with_groups(sample, ~ mutate(.x, proportion = count /sum(count))) 
  ) +
  geom_jitter(
    aes(has_covid, (proportion), color=outlier), 
    size = 0.2, 
    data =
      estimates %>% unnest(count_data) %>% 
      with_groups(sample, ~ mutate(.x, proportion = count /sum(count)))  ,
    height = 0
  ) +
  
  facet_wrap(~ predicted.celltype.l2, scale="free_y", nrow=2) +
  
  scale_y_continuous(trans="S_sqrt", labels = dropLeadingZero) +
  
  scale_color_manual(values = c("black", "#e11f28")) +
  scale_fill_manual(values = c("white" , "#b58b4c", "#74a6aa", "#a15259")) +
  xlab("Biological condition") +
  ylab("Cell-group proportion") +
  multipanel_theme +
  theme(strip.background =element_rect(fill="white", colour = NA), legend.position = "bottom")  +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.x = element_text(angle=30, hjust = 1)
  )

## Joining, by = "predicted.celltype.l2"

plot_brca_boxplot

Study of T cells

CD4 CD8 antibodies

pbmc %>% 
  {
    .x = (.)
    DefaultAssay(.x) <- 'ADT'
    .x
  } %>% 
  FeaturePlot(features = c("CD4", "CD8"), blend = TRUE, reduction = "ref.umap")

Gene co-expression/co-transcription

pbmc %>% 
  {
    .x = (.)
    DefaultAssay(.x) <- 'ADT'
    .x
  } %>% 
  FeatureScatter(
    feature1 = "CD4", feature2 = "CD8" , 
    group.by = "predicted.celltype.l2", 
    cols = pbmc %>% distinct(predicted.celltype.l2, .color) %>% deframe(), pt.size = 0.2
  )

## tidyseurat says: A data frame is returned for independent data analysis.

pbmc %>% 
  {
    .x = (.)
    DefaultAssay(.x) <- 'SCT'
    .x
  } %>% 
  FeatureScatter(
    feature1 = "CD4", feature2 = "CD8A" , 
    group.by = "predicted.celltype.l2", 
    cols = pbmc %>% distinct(predicted.celltype.l2, .color) %>% deframe(), pt.size = 0.2
  )

## tidyseurat says: A data frame is returned for independent data analysis.

T cell subtypes

pbmc %>% 
  {
    .x = (.)
    DefaultAssay(.x) <- 'ADT'
    .x
  } %>% 
  VlnPlot(
    features = c("CD4", "CD8"), 
    group.by = "predicted.celltype.l2", 
    cols = pbmc %>% distinct(predicted.celltype.l2, .color) %>% deframe(), pt.size = 0
  )

## tidyseurat says: A data frame is returned for independent data analysis.

meeting 21 Jan

Stefano Mangiola

1/21/2022

Parse data

Cell clusters

Cell type composition

Differential composition and variability

Study of T cells

T cell subtypes