integrated_sample_annotated <- readRDS("PPCG_deconvolution_signatures_single_cell_PROCESSED/integrated_sample_annotated.rds")
Number of cell types in the intergated datasets
integrated_sample_annotated |>
count(dataset)
## tidyseurat says: A data frame is returned for independent data analysis.
## # A tibble: 5 × 2
## dataset n
## <chr> <int>
## 1 EGAS00001005115 7575
## 2 EGAS00001005787 13007
## 3 GSE137829 19050
## 4 GSE141445 35071
## 5 GSE176031 19339
Number of samples in each dataset
integrated_sample_annotated |>
distinct(dataset, sample) |>
count(dataset)
## tidyseurat says: A data frame is returned for independent data analysis.
## # A tibble: 5 × 2
## dataset n
## <chr> <int>
## 1 EGAS00001005115 1
## 2 EGAS00001005787 16
## 3 GSE137829 6
## 4 GSE141445 13
## 5 GSE176031 15
Integrated UMAP coloursed by dataset
integrated_sample_annotated |>
DimPlot(group.by = "dataset", shuffle = TRUE) +
theme(legend.position = "bottom")
Integrated UMAP coloursed by sample
integrated_sample_annotated |>
DimPlot(group.by = "sample", shuffle = TRUE) +
theme(legend.position = "bottom")
integrated_sample_annotated |>
DimPlot(group.by = "macrocluster", shuffle = TRUE) +
theme(legend.position = "bottom")
integrated_sample_annotated |>
FeaturePlot(
features = c("CD14", "FCGR3A", "CD79A", "CD3G", "EPCAM", "VIM", "PLVAP","CD68", "COL14A1", "KRT17"),
min.cutoff = 'q9',
order = TRUE
)
integrated_sample_annotated |>
DimPlot( group.by = "if_tumor", shuffle = TRUE ) +
scale_color_brewer(palette="Set1")
composition_b1000 <- readRDS("/stornext/Bioinf/data/bioinf-data/Papenfuss_lab/projects/mangiola.s/PostDoc/PPCG_tumour_microenvironment/composition_b1000.rds")
composition_pc_pseudobulk <- readRDS("/stornext/Bioinf/data/bioinf-data/Papenfuss_lab/projects/mangiola.s/PostDoc/PPCG_tumour_microenvironment/composition_pc_pseudobulk.rds")
counts <-
readRDS("PPCG_RNA_count/PPCG_RNAseq_RawCounts_Normalized_SampleAnnot_GenesDetail_1307222.rds") |>
extract(.feature, "symbol", ".+\\|\\|(.+)", remove = FALSE)
Bar plot of bulk-based deconvolution
composition_b1000 |>
left_join(
counts |> pivot_sample()
) |>
pivot_longer(
names_to= c("method", "cell_type"),
values_to = "proportion",
cols=contains("__"),
names_sep ="__"
) %>%
ggplot(aes(x = .sample, y = proportion, fill = cell_type)) +
geom_bar(stat = "identity") +
facet_wrap(method~Center, scale="free", nrow=1) +
guides(fill=guide_legend(ncol=10)) +
theme_multipanel +
theme(legend.position = "bottom", axis.text.x = element_blank())
## Joining, by = ".sample"
Bar plot of single-cell-based deconvolution
composition_pc_pseudobulk |>
left_join(
counts |> pivot_sample()
) |>
pivot_longer(
names_to= c("method", "cell_type"),
values_to = "proportion",
cols=contains("__"),
names_sep ="__"
) %>%
ggplot(aes(x = .sample, y = proportion, fill = cell_type)) +
geom_bar(stat = "identity") +
facet_wrap(method~Center, scale="free", nrow=1) +
guides(fill=guide_legend(ncol=10)) +
theme_multipanel +
theme(legend.position = "bottom", axis.text.x = element_blank())
## Joining, by = ".sample"
dropLeadingZero <- function(l){ stringr::str_replace(l, '0(?=.)', '') }
S_sqrt <- function(x){sign(x)*sqrt(abs(x))}
IS_sqrt <- function(x){x^2*sign(x)}
S_sqrt_trans <- function() scales::trans_new("S_sqrt",S_sqrt,IS_sqrt)
left_join(
composition_pc_pseudobulk |>
pivot_longer(-.sample, values_to = "pc_pseudobulk") |>
mutate(name = name |> str_remove("pc_pseudobulk__")) ,
composition_b1000 |>
pivot_longer(-.sample, values_to = "b1000") |>
mutate(name = name |> str_remove("b1000__"))
) |>
nest(data = -name) |>
mutate(plot = map2(
data, name,
~ .x |>
ggplot(aes(pc_pseudobulk, b1000)) +
geom_point(size=0.2) +
geom_abline(intercept = 0, slope = 1, color="red") +
ggtitle(.y) +
scale_y_continuous(trans=S_sqrt_trans(), labels = dropLeadingZero) +
scale_x_continuous(trans=S_sqrt_trans(), labels = dropLeadingZero) +
theme_multipanel
)) |>
pull(plot) |>
wrap_plots()
## Joining, by = c(".sample", "name")
## Warning: Removed 1757 rows containing missing values (`geom_point()`).
ggMarginal(
composition_b1000 |>
left_join(
counts |> pivot_sample()
) |>
ggplot(aes(purity.scores, b1000__epithelial, color = Center)) +
geom_point(size=0.2) +
geom_smooth(method="lm")+
geom_abline(linetype="dashed", alpha=0.3) +
theme_multipanel,
groupFill = TRUE,
type = "boxplot"
)
## Joining, by = ".sample"
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
Deconvolution (cibersort) also depends on the resolution. Ideally this should not be the case. Here we compare the bulk-based deconvolution (B1000 reference) with specific cell types, and sum their proportions, with generic cell types.
composition_b1000_high_resolution =
readRDS("composition_b1000_high_resolution.rds") |>
rowwise() |>
mutate(
b1000_high_resolution__T =
b1000_high_resolution__t_CD4_memory_central + b1000_high_resolution__t_CD4_memory_effector +
b1000_high_resolution__t_CD8_memory_central + b1000_high_resolution__t_CD8_memory_effector +
b1000_high_resolution__t_CD8_naive + b1000_high_resolution__t_gamma_delta +
b1000_high_resolution__t_helper_h1 +
b1000_high_resolution__t_helper_h17 + b1000_high_resolution__t_helper_h2+
b1000_high_resolution__t_helper_naive + b1000_high_resolution__t_reg,
b1000_high_resolution__B = b1000_high_resolution__b_memory + b1000_high_resolution__b_naive,
b1000_high_resolution__monocyte = b1000_high_resolution__dendritic_myeloid_immature + b1000_high_resolution__dendritic_myeloid_mature + b1000_high_resolution__macrophage_M1 + b1000_high_resolution__macrophage_M2 + b1000_high_resolution__monocyte
) |>
ungroup() |>
select(.sample, b1000_high_resolution__T, b1000_high_resolution__B, b1000_high_resolution__monocyte, b1000_high_resolution__fibroblast, b1000_high_resolution__endothelial, b1000_high_resolution__epithelial)
left_join(
composition_b1000_high_resolution |>
pivot_longer(-.sample, values_to = "b1000_high_resolution") |>
mutate(name = name |> str_remove("b1000_high_resolution__")) ,
composition_b1000 |>
pivot_longer(-.sample, values_to = "b1000") |>
mutate(name = name |> str_remove("b1000__"))
) |>
nest(data = -name) |>
mutate(plot = map2(
data, name,
~ .x |>
ggplot(aes(b1000_high_resolution, b1000)) +
geom_point(size=0.2) +
geom_abline(intercept = 0, slope = 1, color="red") +
ggtitle(.y) +
scale_y_continuous(trans=S_sqrt_trans(), labels = dropLeadingZero) +
scale_x_continuous(trans=S_sqrt_trans(), labels = dropLeadingZero) +
theme_multipanel
)) |>
pull(plot) |>
wrap_plots()
## Joining, by = c(".sample", "name")