These analyses highlight the global biological and technical trends within the transcriptional data. We can see tha major effect is patient source and the minor effect is treatment.
readRDS(here("results/preprocessing_results/pseudobulk_preprocessing/pseudobulk_preprocessing_sample_output.rds")) |>
left_join(readRDS(here("metadata.rds"))) |>
identify_abundant(factor_of_interest = treatment) |>
scale_abundance() |>
reduce_dimensions(method="PCA", action="get") |>
pivot_longer(c(donor,treatment)) |>
ggplot(aes(PC1, PC2, color=value)) +
geom_point() +
facet_wrap(~name) +
scale_color_brewer(palette="Set1") +
theme_bw()
## Joining, by = "sample"
## tidybulk says: the sample with largest library size untreated_17 was chosen as
## reference for scaling
## Getting the 500 most variable genes
## Fraction of variance explained by the selected principal components
## # A tibble: 2 × 2 `Fraction of variance` PC <dbl> <int> 1 0.670 1 2 0.128 2
## tidybulk says: to access the raw results do `attr(..., "internals")$PCA`
## tidySummarizedExperiment says: A data frame is returned for independent data
## analysis.
These plots highlight the cell distribution across cell clusters and samples
counts |>
DimPlot(split.by="sample", ncol=3)
This plot shows the cluste annotation for the 6 integrated samples
counts |>
DimPlot(split.by="sample", ncol=3)
These plots show gene markers for each cluster, irrespectively of treatment of patient variation. This plot is helpful to label cell clusters with cell-type identity.
markers = readRDS(here("seurat_cluster_markers.rds"))
basal_markers = c("KRT5", "TP63", "SNAI1")
luminal_markers = c("KRT8", "ELF5", "KIT", "ESR1", "PGR", "GATA3", "CSN1S1", "LALBA")
counts %>%
DoHeatmap(
features =
markers %>%
group_by(cluster) %>%
top_n(n = 30, wt = avg_log2FC) %>%
pull(gene) |>
c(basal_markers, luminal_markers),
assay = "SCT"
)
## Warning in DoHeatmap(., features = c(markers %>% group_by(cluster) %>% top_n(n
## = 30, : The following features were omitted as they were not found in the
## scale.data slot for the SCT assay: LALBA, GATA3, PGR, SNAI1, CMO305, BNIP3L
This plot shows the intensity of epithelial markler transcription across the cell distribution. This plot is helpful to label cell clusters with cell-type identity.
counts %>%
{
.x = (.)
DefaultAssay(.x) = "SCT"
.x
} |>
FeaturePlot(
features = c(basal_markers, luminal_markers),
pt.size = 0.2, sort.cell = TRUE
) &
scale_color_distiller(palette = "Spectral")
## Warning: The sort.cell parameter is being deprecated. Please use the order
## parameter instead for equivalent functionality.
## Warning: Could not find PGR in the default search locations, found in RNA assay
## instead
## Warning: Could not find LALBA in the default search locations, found in RNA
## assay instead
## Warning in FeaturePlot(counts %>% {: All cells have the same value (0) of
## rna_LALBA.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
#scale_color_gradient(low = "grey", high = "red")
This plots dshow the analysis of differential cluster composition across patients and treatment. We can see that while we can see patient biases, we cannot see significant compositional changes associated with the treatment
diff_composition = readRDS(here("sccomp_differential_cluster_composition.rds"))
plot_diff_composition_1D =
diff_composition |> plot_summary() %$% credible_intervals_1D
## Joining, by = c("sample", "seurat_clusters")
## Joining, by = c("seurat_clusters", "donor")
## Joining, by = c("sample", "seurat_clusters")
## Joining, by = c("seurat_clusters", "treatment")
plot_diff_composition_bar =
diff_composition |> unnest(count_data) |>
with_groups(sample, ~ .x |> mutate(proportion = count / sum(count))) |>
ggplot(aes(sample, proportion, fill=seurat_clusters)) +
geom_bar(stat = "identity") +
scale_fill_brewer(palette="Set1")
The following plot represent the top differentially abundant gene-transcripts across cell clusters (see panels).
counts_pseudobulk = readRDS(here("counts_pseudobulk.rds"))
counts_pseudobulk |>
unnest(data) |>
unite("feature_cluster", c(seurat_clusters, feature), remove = FALSE) |>
filter(FDR<0.05) |>
ggplot(aes(treatment, RNA_scaled + 1)) +
geom_boxplot(aes(fill = seurat_clusters), outlier.shape = NA) +
geom_point(aes(shape = donor)) +
facet_wrap(~ feature_cluster, ncol=10) +
scale_y_log10()
The following plots, one for each cluster, show the significant differentially abundant gene-transcripts associated with treatment,
counts_pseudobulk |>
pull(plot)
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]