library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.0 ✓ forcats 0.5.1
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.2
## Warning: package 'stringr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(Seurat)
## Registered S3 method overwritten by 'spatstat.geom':
## method from
## print.boxx cli
## Attaching SeuratObject
library(SeuratWrappers)
library(tidyseurat)
## Loading required package: ttservice
## ========================================
## tidyseurat version 0.4.0
## If you use TIDYSEURAT in published research, please cite:
##
## Mangiola et al. Interfacing Seurat with the R tidy universe. Bioinformatics 2021.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(tidyseurat))
##
## To restore the Seurat default display use options("restore_Seurat_show" = TRUE)
## ========================================
##
## Attaching package: 'tidyseurat'
## The following objects are masked from 'package:dplyr':
##
## add_count, bind_cols, bind_rows, count
## The following object is masked from 'package:stats':
##
## filter
library(tidySingleCellExperiment)
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:tidyseurat':
##
## count
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:tidyseurat':
##
## rename
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:tidyseurat':
##
## slice
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
##
## Attaching package: 'SummarizedExperiment'
## The following object is masked from 'package:SeuratObject':
##
## Assays
## The following object is masked from 'package:Seurat':
##
## Assays
##
## Attaching package: 'tidySingleCellExperiment'
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:matrixStats':
##
## count
## The following objects are masked from 'package:tidyseurat':
##
## add_count, bind_cols, bind_rows, count, join_features,
## join_transcripts, plot_ly, tidy
## The following object is masked from 'package:ttservice':
##
## join_features
## The following object is masked from 'package:SeuratObject':
##
## pbmc_small
## The following objects are masked from 'package:dplyr':
##
## add_count, bind_cols, bind_rows, count
## The following object is masked from 'package:stats':
##
## filter
library(here)
## here() starts at /stornext/Bioinf/data/bioinf-data/Papenfuss_lab/projects/mangiola.s/PostDoc/covid19pbmc
library(sccomp)
## Loading required package: rstan
## Loading required package: StanHeaders
##
## rstan version 2.26.6 (Stan version 2.26.1)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)
##
## Attaching package: 'rstan'
## The following object is masked from 'package:tidySingleCellExperiment':
##
## extract
## The following object is masked from 'package:tidyseurat':
##
## extract
## The following object is masked from 'package:tidyr':
##
## extract
library(scales)
## Warning: package 'scales' was built under R version 4.1.2
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
# Load multipanel_theme
source("https://gist.githubusercontent.com/stemangiola/fc67b08101df7d550683a5100106561c/raw/2a5e64e29f79a1e30405903fec1280df8662eff4/ggplot_theme_multipanel")
source("~/PostDoc/oligo_breast//functions.R")
options(future.globals.maxSize = 50 * 1024 ^ 3) # for 50 Gb RAM
theme_UMAP =
list(
multipanel_theme,
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
)
S_sqrt <- function(x){sign(x)*sqrt(abs(x))}
IS_sqrt <- function(x){x^2*sign(x)}
S_sqrt_trans <- function() trans_new("S_sqrt",S_sqrt,IS_sqrt)
pbmc <-
here("data", "pbmc_RNA_ADT_QC27_arcsinh_CLR_PCA.sPCA.UMAPs.nodoublets.nounknown.annotated.rds") %>%
readRDS() %>%
# Filter doublets and erythrocytes from Peter analysis
inner_join(
here("data", "sce_singlets_no_erythrocytes.rds") %>%
readRDS() %>%
as_tibble() %>%
select(.cell, doublets.proportion, doublets.known, doublets.predicted ),
by = ".cell"
) %>%
# Filter cell types
tidyseurat::add_count(predicted.celltype.l1, predicted.celltype.l2, name = "cell_count") %>%
filter(cell_count>10) %>%
filter(!predicted.celltype.l2 %in% c("Eryth"))
# Color
color_df =
pbmc %>%
distinct(predicted.celltype.l2, predicted.celltype.l1) %>%
nest(data = -predicted.celltype.l1) %>%
mutate(.palette = c( "Reds", "YlOrBr", "Reds", "Purples", "RdPu", "Greens", "Blues", "Greys" )) %>%
unnest(data) %>%
nest(data = -.palette) %>%
mutate(data = map2(
data, .palette,
~ .x %>%
nest(data__ = -predicted.celltype.l2) %>%
mutate(.color = gradient_n_pal(brewer_pal(palette=.y)(9))(seq(0.3, 1, length.out=n())) ) %>%
unnest(data__)
)) %>%
unnest(data)
pbmc = pbmc %>% left_join(color_df)
job::job({ pbmc %>% saveRDS("Reports/meeting_21Jan_2022/pbmc.rds") })
pbmc = here("Reports", "meeting_21Jan_2022", "pbmc.rds") %>% readRDS()
# UMAP
pbmc %>%
sample_n(10000) %>%
{
.x=(.)
ggplot(.x, aes(refUMAP_1, refUMAP_2)) +
geom_point(aes(fill =predicted.celltype.l2), stroke = 0, shape = 21, size=1, alpha=1 )+
ggrepel::geom_text_repel(
data= get_labels_clusters(
.x %>% sample_n(100),
predicted.celltype.l2,
refUMAP_1,
refUMAP_2
) ,
aes(refUMAP_1, refUMAP_2, label = predicted.celltype.l2), size = 2.5
) +
scale_fill_manual(values= pbmc %>% distinct(predicted.celltype.l2, .color) %>% deframe()) +
guides(fill = guide_legend(override.aes = list(size = 1, alpha=1) ) ) +
theme_UMAP +
theme(axis.title.y = element_blank(), legend.position = "right")
}
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
# Composition
pbmc %>%
count(sample, predicted.celltype.l2, donor, severity, simple_timepoint) %>%
with_groups(c(sample), ~ mutate(.x, proportion = n/sum(n))) %>%
ggplot(aes(sample, proportion, fill = predicted.celltype.l2)) +
geom_bar(position = "stack", stat = "identity") +
facet_grid(~severity, scale="free_x") +
scale_fill_manual(values = pbmc %>% distinct(predicted.celltype.l2, .color) %>% deframe()) +
multipanel_theme +
theme(axis.text.x = element_text(angle=90))
## tidyseurat says: A data frame is returned for independent data analysis.
## tidyseurat says: A data frame is returned for independent data analysis.
# Differential composition
estimates =
pbmc %>%
mutate(has_covid = severity %in% c("moderate", "severe")) %>%
sccomp_glm(
~ has_covid, sample,
predicted.celltype.l2,
formula_variability = ~has_covid
)
estimates %>% saveRDS("Reports/meeting_21Jan_2022/sccomp_estimates.rds")
estimates = here("Reports", "meeting_21Jan_2022" , "sccomp_estimates.rds") %>% readRDS()
simulated_proportion =
estimates |>
replicate_data(number_of_draws = 100) |>
left_join(pbmc |> distinct(has_covid, sample))
## tidyseurat says: A data frame is returned for independent data analysis.
## Joining, by = "sample"
calc_boxplot_stat <- function(x) {
coef <- 1.5
n <- sum(!is.na(x))
# calculate quantiles
stats <- quantile(x, probs = c(0.0, 0.25, 0.5, 0.75, 1.0))
names(stats) <- c("ymin", "lower", "middle", "upper", "ymax")
iqr <- diff(stats[c(2, 4)])
# set whiskers
outliers <- x < (stats[2] - coef * iqr) | x > (stats[4] + coef * iqr)
if (any(outliers)) {
stats[c(1, 5)] <- range(c(stats[2:4], x[!outliers]), na.rm = TRUE)
}
return(stats)
}
plot_brca_boxplot =
ggplot() +
stat_summary(
aes(has_covid, generated_proportions),
fun.data = calc_boxplot_stat, geom="boxplot",
fatten = 0.5, lwd=0.2,
data =
simulated_proportion %>%
# Filter uanitles because of limits
inner_join( estimates %>% distinct(predicted.celltype.l2, composition_pH0_has_covidTRUE)) ,
color="blue"
)+
geom_boxplot(
aes(has_covid, proportion, fill=interaction (composition_pH0_has_covidTRUE <0.025 , variability_pH0_has_covidTRUE <0.025 )),
outlier.shape = NA,
fatten = 0.5, lwd=0.2,
data = estimates %>% unnest(count_data) %>% filter(!outlier) %>%
with_groups(sample, ~ mutate(.x, proportion = count /sum(count)))
) +
geom_jitter(
aes(has_covid, (proportion), color=outlier),
size = 0.2,
data =
estimates %>% unnest(count_data) %>%
with_groups(sample, ~ mutate(.x, proportion = count /sum(count))) ,
height = 0
) +
facet_wrap(~ predicted.celltype.l2, scale="free_y", nrow=2) +
scale_y_continuous(trans="S_sqrt", labels = dropLeadingZero) +
scale_color_manual(values = c("black", "#e11f28")) +
scale_fill_manual(values = c("white" , "#b58b4c", "#74a6aa", "#a15259")) +
xlab("Biological condition") +
ylab("Cell-group proportion") +
multipanel_theme +
theme(strip.background =element_rect(fill="white", colour = NA), legend.position = "bottom") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle=30, hjust = 1)
)
## Joining, by = "predicted.celltype.l2"
plot_brca_boxplot
CD4 CD8 antibodies
pbmc %>%
{
.x = (.)
DefaultAssay(.x) <- 'ADT'
.x
} %>%
FeaturePlot(features = c("CD4", "CD8"), blend = TRUE, reduction = "ref.umap")
Gene co-expression/co-transcription
pbmc %>%
{
.x = (.)
DefaultAssay(.x) <- 'ADT'
.x
} %>%
FeatureScatter(
feature1 = "CD4", feature2 = "CD8" ,
group.by = "predicted.celltype.l2",
cols = pbmc %>% distinct(predicted.celltype.l2, .color) %>% deframe(), pt.size = 0.2
)
## tidyseurat says: A data frame is returned for independent data analysis.
pbmc %>%
{
.x = (.)
DefaultAssay(.x) <- 'SCT'
.x
} %>%
FeatureScatter(
feature1 = "CD4", feature2 = "CD8A" ,
group.by = "predicted.celltype.l2",
cols = pbmc %>% distinct(predicted.celltype.l2, .color) %>% deframe(), pt.size = 0.2
)
## tidyseurat says: A data frame is returned for independent data analysis.
pbmc %>%
{
.x = (.)
DefaultAssay(.x) <- 'ADT'
.x
} %>%
VlnPlot(
features = c("CD4", "CD8"),
group.by = "predicted.celltype.l2",
cols = pbmc %>% distinct(predicted.celltype.l2, .color) %>% deframe(), pt.size = 0
)
## tidyseurat says: A data frame is returned for independent data analysis.