Contamination assessment (script 08) identified clusters 10 and 11 in the CNS leukaemia object as non-leukaemia cells based on four converging lines of evidence: GFP negativity, absence of leukaemia markers (Meis1, Runx1, Cdk6, Bcl2, Mki67), expression of CNS cell type markers (Slc1a2, Sparcl1, S100b), and categorically elevated suspect gene expression sums relative to all other clusters. These clusters (n = 164 + cluster 11 cells) were excluded prior to re-running the pseudobulk differential expression analysis.
All remaining CNS leukaemia clusters (0–9) showed near-zero ambient RNA signal (mean suspect gene UMI < 0.02, median = 0), confirming that diffuse ambient contamination is negligible and does not require further correction.
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
##
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
##
## intersect, t
library(ggplot2)
library(patchwork)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following object is masked from 'package:SeuratObject':
##
## intersect
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff,
## table, tapply, union, unique, unsplit, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:sp':
##
## %over%
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
##
## Attaching package: 'SummarizedExperiment'
## The following object is masked from 'package:Seurat':
##
## Assays
## The following object is masked from 'package:SeuratObject':
##
## Assays
library(knitr)
library(qs2)
## qs2 0.1.7
library(ggrepel)
library(tibble)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:S4Vectors':
##
## expand
library(scales)
tissue_cols <- c("BM" = "steelblue", "CNS" = "firebrick")
source_cols <- c("AD4_EM3" = "#E69F00",
"AD2.2_EM4" = "#56B4E9",
"AD5_EM4" = "#CC79A7")
bm_res_col <- "originalexp_snn_res.0.3"
cns_res_col <- "originalexp_snn_res.0.3"
# Clusters confirmed as contaminating CNS cells
contam_clusters <- c("10", "11")
# Ambient RNA genes to flag in results
ambient_genes <- c("Slc1a2", "Sparcl1", "Cpe", "Apod",
"Mobp", "S100b", "Atp1a2","Ndrg2",
"Map2", "Gfap", "Slc1a3")
leuk_BM <- qs_read("/exports/eddie/scratch/aduguid3/harmony_clustering/04_harmony_recluster_leuk_BM_march26.qs")
leuk_CNS <- qs_read("/exports/eddie/scratch/aduguid3/harmony_clustering/04_harmony_recluster_leuk_CNS_march26.qs") # pre-cleaning
leuk_CNS_clean <- qs_read("/exports/eddie/scratch/aduguid3/harmony_clustering/04_harmony_recluster_leuk_CNS_clean_march26.qs")
stopifnot("leuk_BM not found" = exists("leuk_BM"))
stopifnot("leuk_CNS not found" = exists("leuk_CNS"))
cat("BM cells (input):", ncol(leuk_BM), "\n")
## BM cells (input): 44606
cat("CNS cells (input):", ncol(leuk_CNS), "\n")
## CNS cells (input): 47220
p_before <- DimPlot(leuk_CNS,
reduction = "umap",
group.by = cns_res_col,
label = TRUE,
label.size = 3) +
ggtitle(paste("Before exclusion —", ncol(leuk_CNS), "cells")) +
NoLegend()
p_after <- DimPlot(leuk_CNS_clean,
reduction = "umap",
group.by = cns_res_col,
label = TRUE,
label.size = 3) +
ggtitle(paste("After exclusion —", ncol(leuk_CNS_clean), "cells")) +
NoLegend()
p_before + p_after
CNS leukaemia before and after cluster exclusion
suspect_genes <- c("Slc1a2", "Sparcl1", "S100b", "Atp1a2", "Mobp")
suspect_found <- suspect_genes[suspect_genes %in% rownames(leuk_CNS_clean)]
suspect_expr <- GetAssayData(leuk_CNS_clean, layer = "data")[suspect_found, ]
leuk_CNS_clean$contam_gene_sum <- colSums(suspect_expr)
VlnPlot(leuk_CNS_clean,
features = "contam_gene_sum",
group.by = cns_res_col,
pt.size = 0,
cols = scales::hue_pal()(length(unique(
leuk_CNS_clean@meta.data[[cns_res_col]])))) +
ggtitle("Suspect gene sum after exclusion — all clusters should be flat") +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1))
Suspect gene sum after cluster exclusion
leuk_all_clean <- merge(leuk_BM, leuk_CNS_clean,
add.cell.ids = c("BM", "CNS"))
cat("Combined object:\n")
## Combined object:
cat("Total cells:", ncol(leuk_all_clean), "\n")
## Total cells: 91623
print(table(leuk_all_clean$Tissue))
##
## BM CNS
## 44606 47017
print(table(leuk_all_clean$Tissue, leuk_all_clean$transplant_source))
##
## AD2.2_EM4 AD4_EM3 AD5_EM4
## BM 12162 12453 19991
## CNS 13857 17640 15520
pseudo_clean_path <- "/exports/eddie/scratch/aduguid3/harmony_clustering/pseudo_clean.qs"
file_valid <- file.exists(pseudo_clean_path) &&
tryCatch({ qs_read(pseudo_clean_path); TRUE },
error = function(e) FALSE)
if (!file_valid) {
pseudo_clean <- AggregateExpression(
leuk_all_clean,
group.by = c("Mouse_ID", "Tissue", "transplant_source"),
return.seurat = TRUE
)
qs_save(pseudo_clean, pseudo_clean_path)
} else {
pseudo_clean <- qs_read(pseudo_clean_path)
}
## Names of identity class contain underscores ('_'), replacing with dashes ('-')
## First group.by variable `Mouse_ID` starts with a number, appending `g` to ensure valid variable names
## Centering and scaling data matrix
##
## This message is displayed once every 8 hours.
counts_clean <- GetAssayData(pseudo_clean, layer = "counts")
meta_clean <- pseudo_clean@meta.data
meta_clean$Tissue <- factor(meta_clean$Tissue, levels = c("BM", "CNS"))
meta_clean$transplant_source <- factor(meta_clean$transplant_source)
meta_clean$Mouse_ID <- factor(meta_clean$Mouse_ID)
cat("Pseudobulk matrix:", nrow(counts_clean), "genes x",
ncol(counts_clean), "samples\n")
## Pseudobulk matrix: 26694 genes x 12 samples
print(table(meta_clean$Tissue, meta_clean$transplant_source))
##
## AD2.2-EM4 AD4-EM3 AD5-EM4
## BM 2 2 2
## CNS 2 2 2
dds_clean_path <- "/exports/eddie/scratch/aduguid3/harmony_clustering/dds_clean.qs"
if (!file.exists(dds_clean_path)) {
dds_clean <- DESeqDataSetFromMatrix(
countData = counts_clean,
colData = meta_clean,
design = ~ transplant_source + Tissue
)
keep <- rowSums(counts(dds_clean) >= 10) >= 3
dds_clean <- dds_clean[keep, ]
dds_clean <- DESeq(dds_clean)
qs_save(dds_clean, dds_clean_path)
} else {
dds_clean <- qs_read(dds_clean_path)
}
## converting counts to integer mode
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## estimating size factors
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## final dispersion estimates
## fitting model and testing
# These always run — fast operations
res_clean <- results(dds_clean,
contrast = c("Tissue", "CNS", "BM"),
alpha = 0.05)
cat("\n--- CNS vs BM DE Results (cleaned dataset) ---\n")
##
## --- CNS vs BM DE Results (cleaned dataset) ---
summary(res_clean)
##
## out of 16195 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 168, 1%
## LFC < 0 (down) : 170, 1%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 4)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res_clean_df <- as.data.frame(res_clean) %>%
rownames_to_column("gene") %>%
filter(!is.na(padj)) %>%
arrange(padj) %>%
mutate(ambient_flag = gene %in% ambient_genes)
cat("--- Suspect gene status after cleaning ---\n")
## --- Suspect gene status after cleaning ---
res_clean_df %>%
filter(gene %in% ambient_genes) %>%
select(gene, log2FoldChange, baseMean, padj, ambient_flag) %>%
mutate(across(where(is.numeric), ~round(., 3))) %>%
arrange(desc(log2FoldChange)) %>%
kable(caption = "Ambient RNA genes in cleaned DE results")
| gene | log2FoldChange | baseMean | padj | ambient_flag |
|---|---|---|---|---|
| Sparcl1 | 6.716 | 10.022 | 0.000 | TRUE |
| Slc1a2 | 6.647 | 9.545 | 0.000 | TRUE |
| Apod | 6.248 | 25.797 | 0.000 | TRUE |
| Cpe | 4.998 | 10.576 | 0.000 | TRUE |
| Mobp | 4.943 | 6.459 | 0.003 | TRUE |
| S100b | 4.460 | 5.747 | 0.003 | TRUE |
| Atp1a2 | 3.315 | 4.863 | 0.006 | TRUE |
| Ndrg2 | 2.957 | 15.332 | 0.000 | TRUE |
| Slc1a3 | 1.385 | 4.810 | 0.565 | TRUE |
| Gfap | 1.194 | 8.840 | 0.327 | TRUE |
# Load original results
res_orig_df <- read.csv("02_DE_CNSvsBM_all_mice_source_corrected.csv")
# Concordance
shared <- inner_join(
res_orig_df %>% select(gene, lfc_orig = log2FoldChange),
res_clean_df %>% select(gene, lfc_clean = log2FoldChange),
by = "gene"
) %>%
mutate(
suspect = gene %in% ambient_genes,
resolved = suspect & abs(lfc_clean) < abs(lfc_orig)
)
cor_val <- round(cor(shared$lfc_orig, shared$lfc_clean,
method = "pearson"), 3)
cat("Pearson correlation (pre vs post cleaning):", cor_val, "\n")
## Pearson correlation (pre vs post cleaning): 0.991
cat("Significant genes pre-cleaning (padj<0.05):",
sum(res_orig_df$padj < 0.05, na.rm = TRUE), "\n")
## Significant genes pre-cleaning (padj<0.05): 346
cat("Significant genes post-cleaning (padj<0.05):",
sum(res_clean_df$padj < 0.05, na.rm = TRUE), "\n")
## Significant genes post-cleaning (padj<0.05): 338
ggplot(shared, aes(x = lfc_orig, y = lfc_clean, colour = suspect)) +
geom_point(size = 0.5, alpha = 0.3) +
geom_point(data = filter(shared, suspect), size = 2.5, alpha = 0.9) +
ggrepel::geom_text_repel(data = filter(shared, suspect),
aes(label = gene),
size = 2.5,
max.overlaps = 20) +
scale_colour_manual(values = c("FALSE" = "grey60",
"TRUE" = "firebrick")) +
geom_abline(slope = 1, intercept = 0,
linetype = "dashed", colour = "steelblue") +
theme_classic(base_size = 12) +
labs(x = "log2FC — pre-cleaning",
y = "log2FC — post-cleaning",
title = paste0("DE concordance pre vs post cluster exclusion\n",
"Pearson r = ", cor_val),
colour = "Ambient gene") +
theme(legend.position = "bottom")
DE results before and after cluster exclusion
ambient_blacklist <- c("Slc1a2", "Sparcl1", "Apod", "Cpe",
"Mobp", "S100b", "Atp1a2","Ndrg2")
# Apply to clean DE results
res_clean_df <- res_clean_df %>%
mutate(ambient_flag = gene %in% ambient_blacklist)
# For pathway enrichment — exclude flagged genes
res_for_enrichment <- res_clean_df %>%
filter(!ambient_flag)
res_clean_df %>%
filter(padj < 0.05, log2FoldChange > 0,
!ambient_flag) %>%
head(30) %>%
select(gene, log2FoldChange, baseMean, lfcSE, padj) %>%
mutate(across(where(is.numeric), ~round(., 4))) %>%
kable(caption = "Top 30 CNS-enriched genes — cleaned dataset (padj < 0.05)")
| gene | log2FoldChange | baseMean | lfcSE | padj |
|---|---|---|---|---|
| Soat1 | 0.6900 | 2687.4740 | 0.0879 | 0e+00 |
| Gramd3 | 0.7522 | 785.7219 | 0.1005 | 0e+00 |
| Plaat3 | 0.5200 | 3933.6808 | 0.0695 | 0e+00 |
| Dnm3os | 0.6839 | 1861.8634 | 0.0940 | 0e+00 |
| Skil | 0.5328 | 856.8290 | 0.0773 | 0e+00 |
| Rnf125 | 0.2241 | 9220.7788 | 0.0335 | 0e+00 |
| Tst | 0.4258 | 555.0761 | 0.0658 | 0e+00 |
| Ubb | 0.1738 | 229500.6801 | 0.0271 | 0e+00 |
| Snn | 0.5038 | 529.5151 | 0.0836 | 0e+00 |
| Abhd8 | 0.3369 | 4517.2409 | 0.0566 | 0e+00 |
| 2010110G14Rik | 0.4061 | 2218.8930 | 0.0691 | 0e+00 |
| Gm12958 | 0.4700 | 575.9567 | 0.0806 | 0e+00 |
| Lef1os1 | 0.3475 | 4077.7245 | 0.0606 | 0e+00 |
| Tiam1 | 0.3501 | 4520.4763 | 0.0620 | 0e+00 |
| Stc1 | 0.8787 | 317.4775 | 0.1608 | 0e+00 |
| Cd96 | 0.5849 | 1682.5851 | 0.1078 | 0e+00 |
| Fgd1 | 0.7965 | 327.1907 | 0.1493 | 0e+00 |
| Septin5 | 0.6392 | 226.4566 | 0.1213 | 0e+00 |
| Cdr1os | 6.2049 | 11.0269 | 1.1776 | 0e+00 |
| Lef1 | 0.3490 | 76435.6625 | 0.0665 | 0e+00 |
| Pgghg | 0.2682 | 738.3383 | 0.0518 | 1e-04 |
| Egfem1 | 0.8184 | 987.9560 | 0.1597 | 1e-04 |
| Jak1 | 0.1906 | 28213.1886 | 0.0380 | 1e-04 |
| Cmtm3 | 0.3654 | 337.3029 | 0.0734 | 2e-04 |
| Nfkbie | 0.4794 | 1633.4494 | 0.0988 | 3e-04 |
| Tor3a | 0.3259 | 480.9925 | 0.0682 | 4e-04 |
| Greb1 | 0.7967 | 415.6434 | 0.1666 | 4e-04 |
| Sla2 | 0.2994 | 4458.1527 | 0.0628 | 4e-04 |
| Amz2 | 0.1891 | 5936.1767 | 0.0397 | 4e-04 |
| Gpr83 | 0.8085 | 1918.6277 | 0.1708 | 5e-04 |
res_clean_df %>%
filter(padj < 0.05, log2FoldChange < 0,
!ambient_flag) %>%
arrange(log2FoldChange) %>%
head(30) %>%
select(gene, log2FoldChange, baseMean, lfcSE, padj) %>%
mutate(across(where(is.numeric), ~round(., 4))) %>%
kable(caption = "Top 30 BM-enriched genes — cleaned dataset (padj < 0.05)")
| gene | log2FoldChange | baseMean | lfcSE | padj |
|---|---|---|---|---|
| Ighv1-55 | -6.6472 | 156.2144 | 1.2691 | 0.0000 |
| Igkv8-27 | -6.3627 | 309.3203 | 1.1093 | 0.0000 |
| Igkv10-96 | -4.9884 | 58.2785 | 1.3255 | 0.0142 |
| Igkv6-23 | -4.4524 | 58.5085 | 1.3419 | 0.0452 |
| Iglc1 | -4.3387 | 12.2966 | 0.9317 | 0.0007 |
| Iglv1 | -4.2280 | 77.8822 | 1.0427 | 0.0058 |
| Gm2682 | -4.2260 | 44.9843 | 1.1035 | 0.0119 |
| Gm57200 | -3.6044 | 9.9410 | 1.0016 | 0.0220 |
| Lpl | -3.5327 | 10.2122 | 0.6106 | 0.0000 |
| Cd5l | -3.4785 | 24.8795 | 0.6469 | 0.0000 |
| Clec7a | -3.3886 | 21.8621 | 0.5261 | 0.0000 |
| Jchain | -3.1459 | 254.8241 | 0.6442 | 0.0003 |
| Igha | -3.0918 | 292.1313 | 0.6310 | 0.0002 |
| Spic | -2.9823 | 12.6075 | 0.6281 | 0.0005 |
| Atp6v0d2 | -2.8764 | 20.3445 | 0.4227 | 0.0000 |
| Mgst1 | -2.8658 | 7.7314 | 0.6057 | 0.0005 |
| Gm30211 | -2.8449 | 38.8918 | 0.5129 | 0.0000 |
| Igkc | -2.8245 | 186.8446 | 0.5675 | 0.0002 |
| Epb41l3 | -2.8117 | 17.9392 | 0.4122 | 0.0000 |
| Tgm2 | -2.7874 | 6.4121 | 0.6300 | 0.0016 |
| Fcna | -2.6913 | 15.3077 | 0.4695 | 0.0000 |
| Pparg | -2.6005 | 6.8941 | 0.7700 | 0.0393 |
| Iglc2 | -2.5899 | 8.2540 | 0.6778 | 0.0120 |
| Il6ra | -2.5841 | 6.5994 | 0.7705 | 0.0419 |
| Vcam1 | -2.5787 | 26.3177 | 0.3959 | 0.0000 |
| Slc40a1 | -2.5401 | 13.8461 | 0.4579 | 0.0000 |
| Hebp1 | -2.4878 | 9.5473 | 0.5872 | 0.0031 |
| Tnfaip2 | -2.4509 | 15.3431 | 0.5626 | 0.0021 |
| Il18 | -2.4213 | 11.0189 | 0.4332 | 0.0000 |
| Hpgd | -2.4197 | 9.1420 | 0.6143 | 0.0083 |
res_clean_df %>%
filter(!ambient_flag) %>%
mutate(
direction = case_when(
log2FoldChange > 0.5 & padj < 0.05 ~ "CNS-enriched",
log2FoldChange < -0.5 & padj < 0.05 ~ "BM-enriched",
TRUE ~ "NS"
),
label = ifelse(abs(log2FoldChange) > 1 & padj < 0.01 &
!ambient_flag, gene, NA)
) %>%
ggplot(aes(x = log2FoldChange, y = -log10(padj),
colour = direction)) +
geom_point(size = 0.6, alpha = 0.6) +
geom_text_repel(aes(label = label), size = 2.5, max.overlaps = 20) +
scale_colour_manual(
values = c("CNS-enriched" = "firebrick",
"BM-enriched" = "steelblue",
"NS" = "grey70")) +
geom_vline(xintercept = c(-0.5, 0.5),
linetype = "dashed", colour = "grey40") +
geom_hline(yintercept = -log10(0.05),
linetype = "dashed", colour = "grey40") +
theme_classic(base_size = 12) +
labs(x = "log2FC (CNS vs BM)",
y = "-log10 adjusted p-value",
title = "CNS vs BM — cleaned dataset\n(ambient RNA clusters excluded)",
colour = NULL) +
theme(legend.position = "bottom")
## Warning: Removed 16144 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
Volcano plot — CNS vs BM, cleaned dataset
vsd_clean <- vst(dds_clean, blind = FALSE)
pca_data <- plotPCA(vsd_clean,
intgroup = c("Tissue", "transplant_source"),
returnData = TRUE)
## using ntop=500 top features by variance
pct_var <- round(100 * attr(pca_data, "percentVar"), 1)
ggplot(pca_data,
aes(x = PC1, y = PC2,
colour = transplant_source,
shape = Tissue,
label = name)) +
geom_point(size = 4) +
geom_text_repel(size = 2.8) +
scale_colour_manual(values = source_cols) +
scale_shape_manual(values = c("BM" = 16, "CNS" = 17)) +
theme_classic(base_size = 12) +
labs(x = paste0("PC1: ", pct_var[1], "% variance"),
y = paste0("PC2: ", pct_var[2], "% variance"),
title = "Pseudobulk PCA — cleaned dataset",
colour = "Transplant source",
shape = "Tissue")
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
## No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
Pseudobulk PCA after cluster exclusion
# Save DE results — full and filtered
write.csv(res_clean_df,
"04_DE_CNSvsBM_clean.csv",
row.names = FALSE)
# Save CNS signature (clean, no ambient genes)
res_clean_df %>%
filter(padj < 0.05, log2FoldChange > 0.5, !ambient_flag) %>%
write.csv("04_CNS_signature_clean.csv", row.names = FALSE)
# Save BM signature (clean)
res_clean_df %>%
filter(padj < 0.05, log2FoldChange < -0.5, !ambient_flag) %>%
write.csv("04_BM_signature_clean.csv", row.names = FALSE)
cat("Saved:\n")
## Saved:
cat("04_leuk_CNS_clean.qs —", ncol(leuk_CNS_clean), "cells\n")
## 04_leuk_CNS_clean.qs — 47017 cells
cat("04_DE_CNSvsBM_clean.csv\n")
## 04_DE_CNSvsBM_clean.csv
cat("04_CNS_signature_clean.csv\n")
## 04_CNS_signature_clean.csv
cat("04_BM_signature_clean.csv\n")
## 04_BM_signature_clean.csv
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Rocky Linux 9.5 (Blue Onyx)
##
## Matrix products: default
## BLAS/LAPACK: /opt/intel/oneapi/mkl/2024.0/lib/libmkl_gf_lp64.so.2; LAPACK version 3.10.1
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/London
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] scales_1.4.0 tidyr_1.3.1
## [3] tibble_3.3.1 ggrepel_0.9.6
## [5] qs2_0.1.7 knitr_1.51
## [7] DESeq2_1.46.0 SummarizedExperiment_1.36.0
## [9] Biobase_2.66.0 MatrixGenerics_1.18.1
## [11] matrixStats_1.5.0 GenomicRanges_1.58.0
## [13] GenomeInfoDb_1.42.3 IRanges_2.40.1
## [15] S4Vectors_0.44.0 BiocGenerics_0.52.0
## [17] dplyr_1.1.4 patchwork_1.3.2
## [19] ggplot2_4.0.2 Seurat_5.4.0
## [21] SeuratObject_5.3.0 sp_2.1-4
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.16.0 jsonlite_2.0.0
## [4] magrittr_2.0.3 ggbeeswarm_0.7.3 spatstat.utils_3.2-1
## [7] farver_2.1.2 rmarkdown_2.30 zlibbioc_1.52.0
## [10] vctrs_0.6.5 ROCR_1.0-12 spatstat.explore_3.7-0
## [13] S4Arrays_1.6.0 htmltools_0.5.8.1 SparseArray_1.6.2
## [16] sass_0.4.9 sctransform_0.4.3 parallelly_1.37.1
## [19] KernSmooth_2.23-24 bslib_0.7.0 htmlwidgets_1.6.4
## [22] ica_1.0-3 plyr_1.8.9 plotly_4.12.0
## [25] zoo_1.8-15 cachem_1.1.0 igraph_2.2.2
## [28] mime_0.12 lifecycle_1.0.5 pkgconfig_2.0.3
## [31] Matrix_1.7-0 R6_2.6.1 fastmap_1.2.0
## [34] GenomeInfoDbData_1.2.13 fitdistrplus_1.2-6 future_1.33.2
## [37] shiny_1.12.1 digest_0.6.35 tensor_1.5.1
## [40] RSpectra_0.16-2 irlba_2.3.7 labeling_0.4.3
## [43] progressr_0.18.0 fansi_1.0.6 spatstat.sparse_3.1-0
## [46] httr_1.4.7 polyclip_1.10-7 abind_1.4-5
## [49] compiler_4.4.1 withr_3.0.2 S7_0.2.1
## [52] BiocParallel_1.40.2 fastDummies_1.7.5 MASS_7.3-60.2
## [55] DelayedArray_0.32.0 tools_4.4.1 vipor_0.4.7
## [58] lmtest_0.9-40 otel_0.2.0 beeswarm_0.4.0
## [61] httpuv_1.6.16 future.apply_1.11.2 goftest_1.2-3
## [64] glue_1.8.0 nlme_3.1-164 promises_1.5.0
## [67] grid_4.4.1 Rtsne_0.17 cluster_2.1.6
## [70] reshape2_1.4.4 generics_0.1.3 gtable_0.3.6
## [73] spatstat.data_3.1-9 data.table_1.15.4 stringfish_0.18.0
## [76] utf8_1.2.4 XVector_0.46.0 spatstat.geom_3.7-0
## [79] RcppAnnoy_0.0.23 RANN_2.6.2 pillar_1.9.0
## [82] stringr_1.5.1 spam_2.11-3 RcppHNSW_0.6.0
## [85] later_1.4.6 splines_4.4.1 lattice_0.22-6
## [88] survival_3.6-4 deldir_2.0-4 tidyselect_1.2.1
## [91] locfit_1.5-9.12 miniUI_0.1.2 pbapply_1.7-4
## [94] gridExtra_2.3 scattermore_1.2 xfun_0.56
## [97] stringi_1.8.4 UCSC.utils_1.2.0 lazyeval_0.2.2
## [100] yaml_2.3.12 evaluate_1.0.5 codetools_0.2-20
## [103] cli_3.6.5 RcppParallel_5.1.8 uwot_0.2.4
## [106] xtable_1.8-4 reticulate_1.45.0 jquerylib_0.1.4
## [109] dichromat_2.0-0.1 Rcpp_1.0.12 globals_0.16.3
## [112] spatstat.random_3.4-4 png_0.1-8 ggrastr_1.0.2
## [115] spatstat.univar_3.1-6 parallel_4.4.1 dotCall64_1.2
## [118] listenv_0.9.1 viridisLite_0.4.2 ggridges_0.5.6
## [121] crayon_1.5.2 purrr_1.0.2 rlang_1.1.7
## [124] cowplot_1.2.0