Overview

Building on 13b subclustering results, this script:

  1. Annotates stroma subclusters — fibroblast subtypes, pericytes, endothelial, choroid plexus; flags and removes non-stromal contaminants (megakaryocytes, granulocytes, immune cells)
  2. Annotates myeloid subclusters — homeostatic microglia, BAMs, monocyte-derived, TREM2/LAM, perivascular, proliferating, IFN-responsive
  3. Tissue-matched DE — compares equivalent populations across CNS vs BM to identify niche-specific programmes
  4. Macrophage subtype analysis — M1/M2/LAM scoring and tissue distribution

Setup

# Run once if not installed:
# install.packages("gprofiler2")
library(Seurat)
library(qs2)
library(dplyr)
library(tidyr)
library(ggplot2)
library(patchwork)
library(ComplexHeatmap)
library(circlize)
library(knitr)
library(UCell)
library(gprofiler2)
library(ggrepel)
library(RColorBrewer)

base_path <- "/exports/eddie/scratch/aduguid3"
cache_dir <- file.path(base_path, "harmony_clustering")
stroma  <- qs_read(file.path(cache_dir, "13b_stroma_subclustered.qs"))
myeloid <- qs_read(file.path(cache_dir, "13b_myeloid_subclustered.qs"))

DefaultAssay(stroma)  <- "originalexp"
DefaultAssay(myeloid) <- "originalexp"

cat("Stroma cells:", ncol(stroma), "\n")
## Stroma cells: 1442
print(table(stroma$seurat_clusters, stroma$Tissue))
##    
##      BM CNS
##   0   0 294
##   1 277   7
##   2   0 275
##   3 159   2
##   4   0 122
##   5 118   4
##   6 111   0
##   7  54   1
##   8   0  18
cat("\nMyeloid cells:", ncol(myeloid), "\n")
## 
## Myeloid cells: 3507
print(table(myeloid$seurat_clusters, myeloid$Tissue))
##     
##        BM  CNS
##   0     0 1169
##   1   499   83
##   2    21  544
##   3   406   27
##   4   120   82
##   5     5  123
##   6     0  115
##   7     0  108
##   8     0   73
##   9    60    3
##   10    0   43
##   11   16   10
# ── First check what clusters actually exist ──
cat("Cluster levels:\n")
## Cluster levels:
print(levels(stroma$seurat_clusters))
## [1] "0" "1" "2" "3" "4" "5" "6" "7" "8"
cat("\nUnique values:\n")
## 
## Unique values:
print(sort(unique(as.character(stroma$seurat_clusters))))
## [1] "0" "1" "2" "3" "4" "5" "6" "7" "8"
# ── Build annotation safely ──
stroma$subtype <- dplyr::recode(as.character(stroma$seurat_clusters),
  "0" = "CNS Leptomeningeal Fibroblasts",
  "1" = "BM Osteoblastic Stroma",
  "2" = "CNS Meningeal Fibroblasts (ECM)",
  "3" = "BM Mesenchymal Stroma",
  "4" = "CNS Mesenchymal/Adventitial",
  "5" = "CONTAMINANT: Megakaryocytes",
  "6" = "CONTAMINANT: Granulocytes",
  "7" = "CONTAMINANT: Megakaryocyte/Platelet",
  "8" = "Choroid Plexus Epithelium",
  .default = "Unassigned"
)

stroma$is_contaminant <- grepl("CONTAMINANT", stroma$subtype)

1. Stroma Subcluster Annotation

1a — Annotation Evidence

Based on 13b marker analysis:

  • Cluster 0 (294 cells, all CNS): Ptgds, Penk, Igfbp2 → CNS leptomeningeal fibroblasts
  • Cluster 1 (284 cells, ~all BM): Ibsp, Ebf3, Cpa6, Crabp2 → BM osteoblastic stroma
  • Cluster 2 (275 cells, all CNS): Epha3, Mmp3, Itga8, Cemip → CNS meningeal fibroblasts (ECM-remodelling)
  • Cluster 3 (161 cells, ~all BM): Check below — may be mesenchymal or contaminant
  • Cluster 4 (122 cells, all CNS): Overlap with C2 but distinct → CNS mesenchymal/adventitial
  • Cluster 5 (122 cells, ~all BM): Gp9, Gp1ba, Treml1 → CONTAMINANT Megakaryocytes
  • Cluster 6 (111 cells, all BM): Clec4e, Cxcr2, Mpo, Csf3r → CONTAMINANT Granulocytes
  • Cluster 7 (55 cells, ~all BM): Parvb, Gp1ba, Treml1, F2rl2 → CONTAMINANT Megakaryocyte/platelet
  • Cluster 8 (18 cells, all CNS): Ttr, Foxj1, Otx2, Kl → Choroid plexus epithelium
annotation_genes <- c(
  # Fibroblast core
  "Col1a1", "Col1a2", "Dcn", "Pdgfra",
  # Leptomeningeal
  "Ptgds", "Penk", "Igfbp2", "Slc47a1",
  # Osteoblastic
  "Ibsp", "Bglap", "Sp7", "Runx2", "Ebf3",
  # ECM remodelling
  "Mmp3", "Mmp2", "Cemip", "Epha3",
  # Pericyte
  "Pdgfrb", "Rgs5", "Acta2", "Notch3",
  # Endothelial
  "Pecam1", "Cdh5", "Cldn5",
  # Choroid plexus
  "Ttr", "Foxj1", "Otx2", "Kl",
  # Megakaryocyte (contaminant)
  "Gp9", "Gp1ba", "Pf4", "Itga2b",
  # Granulocyte (contaminant)
  "Mpo", "Csf3r", "Cxcr2", "S100a8",
  # Immune (contaminant)
  "Siglech", "Rag2", "Klrd1", "Cd3e"
)
present <- annotation_genes[annotation_genes %in% rownames(stroma)]

DotPlot(stroma, features = present,
        cols = c("lightgrey", "darkred"), dot.scale = 5) +
  RotatedAxis() +
  ggtitle("Stroma Subcluster Annotation — Diagnostic Panel") +
  theme(axis.text.x = element_text(face = "italic", size = 7))

1b — Check Cluster 3 Identity

Cluster 3 had Siglech/Rag2 in the 13b heatmap — check if truly stromal or immune.

c3_diag <- c("Col1a1", "Pdgfra", "Pdgfrb", "Dcn",
             "Siglech", "Rag2", "Klrd1", "Cd3e",
             "Chrm3", "Nlgn1", "Kcnb2",
             "Gm34680", "Kctd8", "Rtl4")
c3_present <- c3_diag[c3_diag %in% rownames(stroma)]

VlnPlot(stroma, features = c3_present[1:min(8, length(c3_present))],
        group.by = "seurat_clusters", pt.size = 0, ncol = 4) &
  theme(axis.text.x = element_text(size = 9))

## 1c — Apply Annotations & Flag Contaminants

# ── ANNOTATION MAP ──
# Clusters 5, 6, 7 are contaminants; cluster 3 — UPDATE based on 1b results

stroma_labels <- c(
  "0" = "CNS Leptomeningeal Fibroblasts",
  "1" = "BM Osteoblastic Stroma",
  "2" = "CNS Meningeal Fibroblasts (ECM)",
  "3" = "CONTAMINANT: pDC/NK-like",          
  "4" = "CNS Mesenchymal/Adventitial",
  "5" = "CONTAMINANT: Megakaryocytes",
  "6" = "CONTAMINANT: Granulocytes",
  "7" = "CONTAMINANT: Megakaryocyte/Platelet",
  "8" = "Choroid Plexus Epithelium"
)

stroma$is_contaminant <- grepl("CONTAMINANT", stroma$subtype)


cat("Stroma annotation summary:\n")
## Stroma annotation summary:
stroma@meta.data %>%
  count(seurat_clusters, subtype, Tissue, is_contaminant) %>%
  arrange(seurat_clusters) %>%
  kable()
seurat_clusters subtype Tissue is_contaminant n
0 CNS Leptomeningeal Fibroblasts CNS FALSE 294
1 BM Osteoblastic Stroma BM FALSE 277
1 BM Osteoblastic Stroma CNS FALSE 7
2 CNS Meningeal Fibroblasts (ECM) CNS FALSE 275
3 BM Mesenchymal Stroma BM FALSE 159
3 BM Mesenchymal Stroma CNS FALSE 2
4 CNS Mesenchymal/Adventitial CNS FALSE 122
5 CONTAMINANT: Megakaryocytes BM TRUE 118
5 CONTAMINANT: Megakaryocytes CNS TRUE 4
6 CONTAMINANT: Granulocytes BM TRUE 111
7 CONTAMINANT: Megakaryocyte/Platelet BM TRUE 54
7 CONTAMINANT: Megakaryocyte/Platelet CNS TRUE 1
8 Choroid Plexus Epithelium CNS FALSE 18
n_real <- sum(!grepl("CONTAMINANT", stroma_labels))
real_cols <- brewer.pal(min(n_real, 9), "Set1")
if (n_real > 9) real_cols <- c(real_cols, brewer.pal(n_real - 9, "Set2"))

subtype_cols <- setNames(
  c(real_cols[1:n_real],
    rep("grey70", sum(grepl("CONTAMINANT", stroma_labels)))),
  c(stroma_labels[!grepl("CONTAMINANT", stroma_labels)],
    stroma_labels[grepl("CONTAMINANT", stroma_labels)])
)

p1 <- DimPlot(stroma, group.by = "subtype", label = TRUE, label.size = 3,
              repel = TRUE, cols = subtype_cols) +
  ggtitle("Stroma — Annotated Subtypes") +
  theme(legend.text = element_text(size = 7))

p2 <- DimPlot(stroma, group.by = "Tissue",
              cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) +
  ggtitle("By Tissue")

p1 + p2

1d — Remove Contaminants

stroma_clean <- subset(stroma, is_contaminant == FALSE)
cat("Stroma after contaminant removal:", ncol(stroma_clean),
    "(removed", ncol(stroma) - ncol(stroma_clean), "cells)\n\n")
## Stroma after contaminant removal: 1154 (removed 288 cells)
print(table(stroma_clean$subtype, stroma_clean$Tissue))
##                                  
##                                    BM CNS
##   BM Mesenchymal Stroma           159   2
##   BM Osteoblastic Stroma          277   7
##   Choroid Plexus Epithelium         0  18
##   CNS Leptomeningeal Fibroblasts    0 294
##   CNS Meningeal Fibroblasts (ECM)   0 275
##   CNS Mesenchymal/Adventitial       0 122
p1 <- DimPlot(stroma_clean, group.by = "subtype", label = TRUE, label.size = 3,
              repel = TRUE) +
  ggtitle("Clean Stroma — Subtypes")

p2 <- DimPlot(stroma_clean, group.by = "Tissue",
              cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) +
  ggtitle("By Tissue")

p1 + p2


2. Stroma Tissue-Matched DE

2a — Identify Cross-Tissue Populations

cross_tissue <- stroma_clean@meta.data %>%
  count(subtype, Tissue) %>%
  pivot_wider(names_from = Tissue, values_from = n, values_fill = 0) %>%
  mutate(
    shared = CNS > 5 & BM > 5,
    dominant_tissue = case_when(
      CNS > BM * 5 ~ "CNS-dominant",
      BM > CNS * 5 ~ "BM-dominant",
      TRUE ~ "Shared"
    )
  )

cat("Cross-tissue distribution of stroma subtypes:\n")
## Cross-tissue distribution of stroma subtypes:
print(cross_tissue)
## # A tibble: 6 × 5
##   subtype                            BM   CNS shared dominant_tissue
##   <chr>                           <int> <int> <lgl>  <chr>          
## 1 BM Mesenchymal Stroma             159     2 FALSE  BM-dominant    
## 2 BM Osteoblastic Stroma            277     7 TRUE   BM-dominant    
## 3 CNS Leptomeningeal Fibroblasts      0   294 FALSE  CNS-dominant   
## 4 CNS Meningeal Fibroblasts (ECM)     0   275 FALSE  CNS-dominant   
## 5 CNS Mesenchymal/Adventitial         0   122 FALSE  CNS-dominant   
## 6 Choroid Plexus Epithelium           0    18 FALSE  CNS-dominant

2b — Broad Fibroblast Comparison: All CNS vs All BM Stroma

Compares the overall niche programme between tissues on cleaned data.

Idents(stroma_clean) <- "Tissue"

de_stroma_clean <- FindMarkers(stroma_clean,
                                ident.1 = "CNS", ident.2 = "BM",
                                test.use = "wilcox",
                                min.pct = 0.1, logfc.threshold = 0)
de_stroma_clean$gene <- rownames(de_stroma_clean)

cat("Clean stroma CNS vs BM DE — significant genes:", sum(de_stroma_clean$p_val_adj < 0.05), "\n")
## Clean stroma CNS vs BM DE — significant genes: 4149
cat("  CNS-enriched:", sum(de_stroma_clean$avg_log2FC > 0.5 & de_stroma_clean$p_val_adj < 0.05), "\n")
##   CNS-enriched: 1964
cat("  BM-enriched:", sum(de_stroma_clean$avg_log2FC < -0.5 & de_stroma_clean$p_val_adj < 0.05), "\n")
##   BM-enriched: 1920
niche_genes <- c("Cxcl12", "Cxcl16", "Ptn", "Mdk", "Kitl", "Fn1",
                  "Col1a1", "Col3a1", "Igf1", "Tgfb1", "Tgfb2",
                  "Il33", "Wnt5a", "App", "Apoe", "Ptgds",
                  "Pdgfra", "Pdgfrb", "Mmp2", "Mmp14", "Postn",
                  "Ibsp", "Ebf3", "Crabp2", "Penk")

de_stroma_plot <- de_stroma_clean %>%
  mutate(
    sig = case_when(
      p_val_adj < 0.05 & avg_log2FC > 0.5  ~ "CNS-enriched",
      p_val_adj < 0.05 & avg_log2FC < -0.5 ~ "BM-enriched",
      TRUE ~ "NS"
    ),
    label = ifelse(gene %in% niche_genes |
                     (p_val_adj < 1e-20 & abs(avg_log2FC) > 2),
                   gene, "")
  )

ggplot(de_stroma_plot, aes(x = avg_log2FC, y = -log10(p_val_adj),
                            colour = sig, label = label)) +
  geom_point(size = 0.8, alpha = 0.5) +
  geom_text_repel(size = 3, max.overlaps = 30,
                   fontface = "italic", segment.size = 0.2) +
  scale_colour_manual(values = c("CNS-enriched" = "#B2182B",
                                  "BM-enriched" = "#2166AC", "NS" = "grey80")) +
  geom_vline(xintercept = c(-0.5, 0.5), linetype = "dashed", colour = "grey60") +
  theme_minimal(base_size = 12) +
  labs(title = "Stroma CNS vs BM (Contaminants Removed)",
       x = "log2FC (+ = CNS)", y = "-log10(padj)")

2c — Subtype-Specific DE (If Shared Populations Exist)

shared_subtypes <- cross_tissue %>% filter(shared) %>% pull(subtype)
de_stroma_subtype <- list()

if (length(shared_subtypes) > 0) {
  for (st in shared_subtypes) {
    cat("\n### ", st, " — CNS vs BM\n\n")
    sub <- subset(stroma_clean, subtype == st)
    if (min(table(sub$Tissue)) < 10) { cat("Skipped: too few cells\n"); next }
    
    Idents(sub) <- "Tissue"
    de <- FindMarkers(sub, ident.1 = "CNS", ident.2 = "BM",
                      test.use = "wilcox", min.pct = 0.1, logfc.threshold = 0)
    de$gene <- rownames(de)
    de_stroma_subtype[[st]] <- de
    
    cat("Significant DE genes:", sum(de$p_val_adj < 0.05), "\n")
    cat("CNS-enriched:", sum(de$avg_log2FC > 0.5 & de$p_val_adj < 0.05), "\n")
    cat("BM-enriched:", sum(de$avg_log2FC < -0.5 & de$p_val_adj < 0.05), "\n\n")
    
    cat("**Top CNS-enriched:**\n\n")
    de %>% filter(p_val_adj < 0.05, avg_log2FC > 0.5) %>%
      arrange(desc(avg_log2FC)) %>% head(15) %>%
      select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
      kable(digits = c(NA, 2, 2, 2, 3)) %>% print()
    
    cat("\n**Top BM-enriched:**\n\n")
    de %>% filter(p_val_adj < 0.05, avg_log2FC < -0.5) %>%
      arrange(avg_log2FC) %>% head(15) %>%
      select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
      kable(digits = c(NA, 2, 2, 2, 3)) %>% print()
  }
} else {
  cat("No stroma subtypes span both tissues with sufficient cells.\n")
  cat("CNS and BM stroma are transcriptionally distinct — tissue-level DE is appropriate.\n")
}

BM Osteoblastic Stroma — CNS vs BM

Skipped: too few cells

2d — CNS-Specific Subtype Comparison

cns_stroma <- subset(stroma_clean, Tissue == "CNS")
cns_subtypes <- names(which(table(cns_stroma$subtype) >= 10))
cat("CNS stroma subtypes with >=10 cells:", paste(cns_subtypes, collapse = "\n  "), "\n\n")
## CNS stroma subtypes with >=10 cells: Choroid Plexus Epithelium
##   CNS Leptomeningeal Fibroblasts
##   CNS Meningeal Fibroblasts (ECM)
##   CNS Mesenchymal/Adventitial
Idents(cns_stroma) <- "subtype"

if (length(cns_subtypes) >= 2) {
  de_cns_subtypes <- FindAllMarkers(cns_stroma, only.pos = TRUE,
                                     min.pct = 0.2, logfc.threshold = 0.5,
                                     verbose = FALSE)
  cat("DE genes distinguishing CNS stroma subtypes:", nrow(de_cns_subtypes), "\n\n")
  
  de_cns_subtypes %>%
    group_by(cluster) %>%
    slice_max(avg_log2FC, n = 15) %>%
    select(cluster, gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
    kable(digits = c(NA, NA, 2, 2, 2, 3),
          caption = "Top 15 markers per CNS stroma subtype")
}
## DE genes distinguishing CNS stroma subtypes: 7650
Top 15 markers per CNS stroma subtype
cluster gene avg_log2FC pct.1 pct.2 p_val_adj
CNS Leptomeningeal Fibroblasts Ptgds 2.67 0.34 0.21 0.048
CNS Leptomeningeal Fibroblasts Penk 1.93 0.65 0.50 0.000
CNS Leptomeningeal Fibroblasts Igfbp2 1.29 0.45 0.39 1.000
CNS Leptomeningeal Fibroblasts Nme2 1.28 0.98 0.96 0.000
CNS Leptomeningeal Fibroblasts Asgr1 1.25 0.29 0.23 1.000
CNS Leptomeningeal Fibroblasts S100b 1.24 0.51 0.52 1.000
CNS Leptomeningeal Fibroblasts Gm19951 1.20 0.82 0.80 0.000
CNS Leptomeningeal Fibroblasts Cyb5r3 1.19 0.63 0.65 0.025
CNS Leptomeningeal Fibroblasts Rps8 1.19 0.99 1.00 0.000
CNS Leptomeningeal Fibroblasts Cmss1 1.14 0.98 0.99 0.000
CNS Leptomeningeal Fibroblasts Cdk8 1.12 0.96 0.97 0.000
CNS Leptomeningeal Fibroblasts Rps24 1.12 1.00 0.99 0.000
CNS Leptomeningeal Fibroblasts Cd74 1.11 0.68 0.58 0.001
CNS Leptomeningeal Fibroblasts Reg3g 1.11 0.28 0.21 1.000
CNS Leptomeningeal Fibroblasts Fth1 1.10 1.00 1.00 0.000
Choroid Plexus Epithelium 2900040C04Rik 13.74 0.83 0.00 0.000
Choroid Plexus Epithelium Ttr 12.78 0.89 0.04 0.000
Choroid Plexus Epithelium Pcp4l1 12.62 0.83 0.00 0.000
Choroid Plexus Epithelium Prr32 11.85 0.83 0.00 0.000
Choroid Plexus Epithelium Ppp1r1b 11.85 0.94 0.00 0.000
Choroid Plexus Epithelium Vat1l 11.62 0.78 0.00 0.000
Choroid Plexus Epithelium Otx2 11.51 0.83 0.00 0.000
Choroid Plexus Epithelium Kl 11.50 0.72 0.00 0.000
Choroid Plexus Epithelium Foxj1 10.86 0.67 0.00 0.000
Choroid Plexus Epithelium Maob 10.84 0.83 0.00 0.000
Choroid Plexus Epithelium Folr1 10.73 0.56 0.00 0.000
Choroid Plexus Epithelium Krt18 10.62 0.89 0.00 0.000
Choroid Plexus Epithelium Spag6l 10.57 0.72 0.00 0.000
Choroid Plexus Epithelium Tmem72 10.57 0.67 0.00 0.000
Choroid Plexus Epithelium Drc7 10.54 0.78 0.00 0.000
CNS Meningeal Fibroblasts (ECM) Chil1 3.31 0.35 0.08 0.000
CNS Meningeal Fibroblasts (ECM) Zfp385b 3.10 0.21 0.02 0.000
CNS Meningeal Fibroblasts (ECM) Rrad 3.06 0.23 0.04 0.000
CNS Meningeal Fibroblasts (ECM) Kcnab1 2.97 0.20 0.03 0.000
CNS Meningeal Fibroblasts (ECM) Gm31641 2.69 0.32 0.06 0.000
CNS Meningeal Fibroblasts (ECM) Il1r1 2.60 0.80 0.21 0.000
CNS Meningeal Fibroblasts (ECM) Ptgfr 2.57 0.23 0.03 0.000
CNS Meningeal Fibroblasts (ECM) Epha3 2.52 0.79 0.23 0.000
CNS Meningeal Fibroblasts (ECM) Il15ra 2.52 0.23 0.04 0.000
CNS Meningeal Fibroblasts (ECM) 4930578G10Rik 2.50 0.43 0.06 0.000
CNS Meningeal Fibroblasts (ECM) Itga8 2.44 0.32 0.06 0.000
CNS Meningeal Fibroblasts (ECM) Mmp3 2.39 0.33 0.10 0.000
CNS Meningeal Fibroblasts (ECM) Cemip 2.38 0.75 0.22 0.000
CNS Meningeal Fibroblasts (ECM) Rhobtb3 2.38 0.22 0.04 0.000
CNS Meningeal Fibroblasts (ECM) Nox4 2.33 0.58 0.08 0.000
CNS Mesenchymal/Adventitial Gm26740 3.68 0.21 0.02 0.000
CNS Mesenchymal/Adventitial Fam227b 3.44 0.20 0.03 0.000
CNS Mesenchymal/Adventitial Nlgn1 2.98 0.39 0.10 0.000
CNS Mesenchymal/Adventitial Chrm3 2.97 0.75 0.20 0.000
CNS Mesenchymal/Adventitial D630045J12Rik 2.65 0.30 0.09 0.000
CNS Mesenchymal/Adventitial Unc5c 2.58 0.58 0.17 0.000
CNS Mesenchymal/Adventitial Rtl4 2.56 0.35 0.10 0.000
CNS Mesenchymal/Adventitial Lrrc38 2.55 0.22 0.05 0.000
CNS Mesenchymal/Adventitial A330076H08Rik 2.54 0.26 0.10 0.001
CNS Mesenchymal/Adventitial Nalcn 2.52 0.39 0.07 0.000
CNS Mesenchymal/Adventitial Kcnb2 2.41 0.72 0.22 0.000
CNS Mesenchymal/Adventitial Grip1 2.38 0.44 0.12 0.000
CNS Mesenchymal/Adventitial A230057D06Rik 2.32 0.42 0.11 0.000
CNS Mesenchymal/Adventitial Kctd8 2.29 0.51 0.21 0.000
CNS Mesenchymal/Adventitial Znrf3 2.27 0.23 0.09 0.066
BM Osteoblastic Stroma Adipoq 10.80 0.43 0.00 0.000
BM Osteoblastic Stroma Csrnp3 10.54 0.29 0.00 0.000
BM Osteoblastic Stroma Grem1 10.15 0.43 0.00 0.000
BM Osteoblastic Stroma Meis2 9.77 0.43 0.01 0.000
BM Osteoblastic Stroma H2-Q10 9.68 0.43 0.01 0.000
BM Osteoblastic Stroma Negr1 9.54 0.43 0.00 0.000
BM Osteoblastic Stroma Ebf3 9.47 0.29 0.00 0.000
BM Osteoblastic Stroma Arhgef38 9.44 0.29 0.00 0.000
BM Osteoblastic Stroma Gm14636 9.31 0.43 0.02 0.000
BM Osteoblastic Stroma Fosl1 9.24 0.43 0.01 0.000
BM Osteoblastic Stroma Inhbb 8.98 0.43 0.00 0.000
BM Osteoblastic Stroma Hp 8.88 0.71 0.09 0.000
BM Osteoblastic Stroma F730043M19Rik 8.63 0.43 0.00 0.000
BM Osteoblastic Stroma Lpl 8.60 0.71 0.00 0.000
BM Osteoblastic Stroma Slc10a6 8.49 0.29 0.00 0.000
if (exists("de_cns_subtypes") && nrow(de_cns_subtypes) > 0) {
  top_cns <- de_cns_subtypes %>%
    group_by(cluster) %>% slice_max(avg_log2FC, n = 10) %>%
    pull(gene) %>% unique()
  
  DoHeatmap(cns_stroma, features = top_cns, size = 3, group.by = "subtype") +
    theme(axis.text.y = element_text(face = "italic", size = 7)) +
    ggtitle("CNS Stroma Subtype Markers")
}

2e — Stroma GO Enrichment (Clean)

stroma_cns_genes <- de_stroma_clean %>%
  filter(p_val_adj < 0.05, avg_log2FC > 0.5) %>% pull(gene)
stroma_bm_genes <- de_stroma_clean %>%
  filter(p_val_adj < 0.05, avg_log2FC < -0.5) %>% pull(gene)

if (length(stroma_cns_genes) >= 5) {
  go_stroma_cns <- gost(stroma_cns_genes, organism = "mmusculus",
                         sources = "GO:BP", evcodes = TRUE, significant = TRUE)
  if (!is.null(go_stroma_cns$result) && nrow(go_stroma_cns$result) > 0) {
    print(gostplot(go_stroma_cns, capped = TRUE, interactive = FALSE) +
            ggtitle("Stroma CNS-Enriched (Clean) — GO BP"))
    go_stroma_cns$result %>% filter(source == "GO:BP") %>%
      arrange(p_value) %>% head(20) %>%
      select(term_name, p_value, term_size, intersection_size) %>%
      kable(digits = c(NA, 3, 0, 0), caption = "Top 20 GO:BP — CNS Stroma (Clean)")
  }
}

Top 20 GO:BP — CNS Stroma (Clean)
term_name p_value term_size intersection_size
localization 0 5309 638
system development 0 4155 527
multicellular organism development 0 4907 590
transport 0 4364 539
establishment of localization 0 4666 565
anatomical structure development 0 6280 698
developmental process 0 6873 740
small molecule metabolic process 0 1762 276
positive regulation of biological process 0 6447 684
positive regulation of cellular process 0 6095 652
cellular localization 0 3493 430
anatomical structure morphogenesis 0 2781 362
regulation of signal transduction 0 2940 371
regulation of cell communication 0 3560 425
regulation of signaling 0 3558 424
animal organ development 0 3272 397
cell surface receptor signaling pathway 0 2731 344
macromolecule localization 0 3054 371
regulation of cellular component organization 0 2537 324
cell death 0 2081 281
if (length(stroma_bm_genes) >= 5) {
  go_stroma_bm <- gost(stroma_bm_genes, organism = "mmusculus",
                         sources = "GO:BP", evcodes = TRUE, significant = TRUE)
  if (!is.null(go_stroma_bm$result) && nrow(go_stroma_bm$result) > 0) {
    print(gostplot(go_stroma_bm, capped = TRUE, interactive = FALSE) +
            ggtitle("Stroma BM-Enriched (Clean) — GO BP"))
    go_stroma_bm$result %>% filter(source == "GO:BP") %>%
      arrange(p_value) %>% head(20) %>%
      select(term_name, p_value, term_size, intersection_size) %>%
      kable(digits = c(NA, 3, 0, 0), caption = "Top 20 GO:BP — BM Stroma (Clean)")
  }
}

Top 20 GO:BP — BM Stroma (Clean)
term_name p_value term_size intersection_size
positive regulation of biological process 0 6447 898
positive regulation of cellular process 0 6095 853
regulation of response to stimulus 0 3939 635
biological regulation 0 13503 1355
regulation of biological process 0 13108 1330
regulation of cellular process 0 12715 1300
negative regulation of biological process 0 5759 771
negative regulation of cellular process 0 5540 747
immune system process 0 2848 487
positive regulation of metabolic process 0 3493 549
regulation of metabolic process 0 6679 828
positive regulation of macromolecule metabolic process 0 3174 508
regulation of multicellular organismal process 0 3080 498
regulation of macromolecule metabolic process 0 6153 771
regulation of primary metabolic process 0 5176 687
regulation of immune system process 0 1586 328
positive regulation of response to stimulus 0 2336 413
response to stimulus 0 10427 1094
intracellular signal transduction 0 2810 460
developmental process 0 6873 822

3. Myeloid Subcluster Annotation

3a — Annotation Evidence

myeloid_diag <- c(
  "P2ry12", "Tmem119", "Hexb", "Siglech", "Sall1", "Cx3cr1",
  "H2-Aa", "H2-Eb1", "H2-Ab1", "Cd74", "Ciita",
  "Mrc1", "Lyve1", "Cd163", "Pf4", "Folr2", "Cbr2", "Ms4a7",
  "Ly6c2", "Ccr2", "S100a4", "Sell", "Plac8",
  "Trem2", "Apoe", "Axl", "Cd9", "Spp1", "Gpnmb", "Lpl",
  "Tnf", "Il1b", "Nos2", "Ccl2", "Cd86", "Irf5",
  "Arg1", "Tgfb1", "Il10", "Cd274", "Retnla",
  "C1qa", "C1qb", "C1qc",
  "Mki67", "Top2a", "Birc5", "Aurka",
  "Ifit1", "Ifit2", "Mx1", "Isg15", "Oasl2",
  "Lgals9", "Cd47", "Nt5e", "Ptgs2",
  "Mertk", "Cd36", "Marco"
)
myeloid_diag_present <- unique(myeloid_diag[myeloid_diag %in% rownames(myeloid)])

DotPlot(myeloid, features = myeloid_diag_present,
        cols = c("lightgrey", "darkred"), dot.scale = 4) +
  RotatedAxis() +
  ggtitle("Myeloid Subcluster — Comprehensive Identity Panel") +
  theme(axis.text.x = element_text(face = "italic", size = 6),
        axis.text.y = element_text(size = 8))

3b — Tissue x Annotation Breakdown

myeloid@meta.data %>%
  count(seurat_clusters, original_annotation, Tissue) %>%
  pivot_wider(names_from = c(original_annotation, Tissue),
              values_from = n, values_fill = 0) %>%
  kable(caption = "Myeloid subcluster composition by annotation and tissue")
Myeloid subcluster composition by annotation and tissue
seurat_clusters Macrophages_CNS Microglia-like macrophages_CNS Macrophages_BM
0 1 1168 0
1 76 7 499
2 496 48 21
3 27 0 406
4 64 18 120
5 115 8 5
6 4 111 0
7 102 6 0
8 3 70 0
9 3 0 60
10 4 39 0
11 9 1 16

3c — Apply Myeloid Annotations

# ── MYELOID ANNOTATION MAP ──
# UPDATE based on dotplot in 3a. Key markers to look for:
#   P2ry12+/Tmem119+ → Homeostatic Microglia
#   Mrc1+/Lyve1+/Cd163+ → BAM/Perivascular
#   Ly6c2+/Ccr2+ → Monocyte-derived
#   Trem2+/Apoe+/Spp1+ → LAM/TREM2+
#   Mki67/Top2a → Proliferating
#   Ifit1/Mx1 → IFN-responsive

myeloid_labels <- c(
  "0"  = "Homeostatic Microglia (MHC-II high)",
  "1"  = "BM Macrophages",
  "2"  = "BM Macrophages (Ly6c2+)",
  "3"  = "BM Macrophages (activated)",
  "4"  = "Proliferating Myeloid",
  "5"  = "CNS Macrophages (Trem2+/LAM)",
  "6"  = "Homeostatic Microglia",
  "7"  = "Perivascular Macrophages (Lyve1+)",
  "8"  = "IFN-Responsive Myeloid",
  "9"  = "CNS Macrophages (monocyte-derived)",
  "10" = "Stress/Dissociation-associated",
  "11" = "BAM (border-associated)"
)

existing_clusters <- as.character(sort(unique(as.integer(
  as.character(myeloid$seurat_clusters)))))
cat("Existing myeloid clusters:", paste(existing_clusters, collapse = ", "), "\n")
## Existing myeloid clusters: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
for (cl in existing_clusters) {
  if (!(cl %in% names(myeloid_labels))) {
    myeloid_labels[cl] <- paste0("Cluster_", cl, " (unassigned)")
  }
}

myeloid$subtype <- dplyr::recode(as.character(myeloid$seurat_clusters),
  "0"  = "Homeostatic Microglia (MHC-II high)",
  "1"  = "BM Macrophages",
  "2"  = "BM Macrophages (Ly6c2+)",
  "3"  = "BM Macrophages (activated)",
  "4"  = "Proliferating Myeloid",
  "5"  = "CNS Macrophages (Trem2+/LAM)",
  "6"  = "Homeostatic Microglia",
  "7"  = "Perivascular Macrophages (Lyve1+)",
  "8"  = "IFN-Responsive Myeloid",
  "9"  = "CNS Macrophages (monocyte-derived)",
  "10" = "Stress/Dissociation-associated",
  "11" = "BAM (border-associated)",
  .default = "Unassigned"
)

cat("\nMyeloid annotation summary:\n")
## 
## Myeloid annotation summary:
myeloid@meta.data %>%
  count(seurat_clusters, subtype, Tissue) %>%
  pivot_wider(names_from = Tissue, values_from = n, values_fill = 0) %>%
  arrange(seurat_clusters) %>%
  kable()
seurat_clusters subtype CNS BM
0 Homeostatic Microglia (MHC-II high) 1169 0
1 BM Macrophages 83 499
2 BM Macrophages (Ly6c2+) 544 21
3 BM Macrophages (activated) 27 406
4 Proliferating Myeloid 82 120
5 CNS Macrophages (Trem2+/LAM) 123 5
6 Homeostatic Microglia 115 0
7 Perivascular Macrophages (Lyve1+) 108 0
8 IFN-Responsive Myeloid 73 0
9 CNS Macrophages (monocyte-derived) 3 60
10 Stress/Dissociation-associated 43 0
11 BAM (border-associated) 10 16
p1 <- DimPlot(myeloid, group.by = "subtype", label = TRUE, label.size = 3,
              repel = TRUE) +
  ggtitle("Myeloid — Annotated Subtypes") +
  theme(legend.text = element_text(size = 7))

p2 <- DimPlot(myeloid, group.by = "Tissue",
              cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) +
  ggtitle("By Tissue")

p1 + p2

DimPlot(myeloid, group.by = "subtype", split.by = "Tissue",
        label = TRUE, label.size = 2.5, repel = TRUE) +
  ggtitle("Myeloid Subtypes by Tissue") +
  theme(legend.text = element_text(size = 7))


4. Macrophage Polarisation Scoring

4a — M1/M2/LAM Module Scores

mac_modules <- list(
  M1_proinflammatory = c("Tnf", "Il1b", "Il6", "Nos2", "Ccl2", "Ccl3",
                          "Ccl4", "Cxcl10", "Cd86", "Cd80", "Irf5", "Stat1",
                          "Nfkb1", "Socs3"),
  M2_tolerogenic = c("Arg1", "Mrc1", "Cd163", "Tgfb1", "Il10", "Retnla",
                       "Chil3", "Cd274", "Vegfa", "Socs1", "Stat6",
                       "Ccl22", "Ccl17"),
  LAM_TREM2 = c("Trem2", "Tyrobp", "Apoe", "Axl", "Cd9", "Spp1",
                  "Lpl", "Cst7", "Gpnmb", "Itgax", "Clec7a", "Lgals3"),
  Antigen_presentation = c("H2-Eb1", "H2-Ab1", "H2-Aa", "Cd74", "Ciita",
                             "Cd80", "Cd86", "Tap1", "B2m"),
  Phagocytic = c("Mertk", "Axl", "Tyro3", "Cd36", "Megf10",
                   "Marco", "Scarb1", "Fcgr1"),
  Immunosuppressive = c("Lgals9", "Cd274", "Pdcd1lg2", "Tgfb1", "Tgfb2",
                          "Il10", "Arg1", "Nt5e", "Ptgs2", "Cd47")
)

mac_mods_filt <- lapply(mac_modules, function(g) g[g %in% rownames(myeloid)])
mac_mods_filt <- mac_mods_filt[sapply(mac_mods_filt, length) >= 3]
cat("Modules with >=3 genes:\n")
## Modules with >=3 genes:
for (nm in names(mac_mods_filt)) cat("  ", nm, ":", length(mac_mods_filt[[nm]]), "genes\n")
##    M1_proinflammatory : 14 genes
##    M2_tolerogenic : 13 genes
##    LAM_TREM2 : 12 genes
##    Antigen_presentation : 9 genes
##    Phagocytic : 8 genes
##    Immunosuppressive : 10 genes
myeloid <- AddModuleScore_UCell(myeloid, features = mac_mods_filt, name = "_pol")
pol_cols <- grep("_pol$", colnames(myeloid@meta.data), value = TRUE)

VlnPlot(myeloid, features = pol_cols,
        group.by = "subtype", pt.size = 0, ncol = 2) &
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7))

4b — M1 vs M2 Polarisation by Tissue

m1_col <- grep("M1_proinflammatory", pol_cols, value = TRUE)
m2_col <- grep("M2_tolerogenic", pol_cols, value = TRUE)

if (length(m1_col) > 0 && length(m2_col) > 0) {
  myeloid@meta.data %>%
    ggplot(aes(x = .data[[m2_col]], y = .data[[m1_col]], colour = Tissue)) +
    geom_point(size = 0.5, alpha = 0.4) +
    scale_colour_manual(values = c("BM" = "#2166AC", "CNS" = "#B2182B")) +
    facet_wrap(~subtype, ncol = 4) +
    geom_hline(yintercept = 0, linetype = "dashed", colour = "grey60") +
    geom_vline(xintercept = 0, linetype = "dashed", colour = "grey60") +
    theme_minimal(base_size = 10) +
    labs(title = "M1 vs M2 Polarisation by Subtype and Tissue",
         x = "M2 / Tolerogenic Score", y = "M1 / Pro-inflammatory Score") +
    theme(strip.text = element_text(size = 7))
}

4c — LAM/TREM2 Programme by Tissue

lam_col <- grep("LAM_TREM2", pol_cols, value = TRUE)
if (length(lam_col) > 0) {
  myeloid@meta.data %>%
    ggplot(aes(x = subtype, y = .data[[lam_col]], fill = Tissue)) +
    geom_boxplot(outlier.size = 0.3, alpha = 0.7) +
    scale_fill_manual(values = c("BM" = "#2166AC", "CNS" = "#B2182B")) +
    theme_minimal(base_size = 11) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
    labs(title = "LAM/TREM2 Programme Score by Subtype and Tissue",
         y = "LAM/TREM2 UCell Score")
}

4d — Immunosuppressive Score by Tissue

immunosupp_col <- grep("Immunosuppressive", pol_cols, value = TRUE)
if (length(immunosupp_col) > 0) {
  myeloid@meta.data %>%
    ggplot(aes(x = subtype, y = .data[[immunosupp_col]], fill = Tissue)) +
    geom_boxplot(outlier.size = 0.3, alpha = 0.7) +
    scale_fill_manual(values = c("BM" = "#2166AC", "CNS" = "#B2182B")) +
    theme_minimal(base_size = 11) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
    labs(title = "Immunosuppressive Programme Score by Subtype and Tissue",
         y = "Immunosuppressive UCell Score")
}


5. Myeloid Tissue-Matched DE

5a — Identify Cross-Tissue Macrophage Populations

mac_cross <- myeloid@meta.data %>%
  count(subtype, Tissue) %>%
  pivot_wider(names_from = Tissue, values_from = n, values_fill = 0) %>%
  mutate(
    total = BM + CNS,
    shared = CNS >= 10 & BM >= 10,
    dominant = case_when(
      CNS > BM * 5 ~ "CNS-dominant",
      BM > CNS * 5 ~ "BM-dominant",
      TRUE ~ "Shared"
    )
  ) %>% arrange(desc(total))

cat("Myeloid subtypes — tissue distribution:\n")
## Myeloid subtypes — tissue distribution:
print(mac_cross)
## # A tibble: 12 × 6
##    subtype                                BM   CNS total shared dominant    
##    <chr>                               <int> <int> <int> <lgl>  <chr>       
##  1 Homeostatic Microglia (MHC-II high)     0  1169  1169 FALSE  CNS-dominant
##  2 BM Macrophages                        499    83   582 TRUE   BM-dominant 
##  3 BM Macrophages (Ly6c2+)                21   544   565 TRUE   CNS-dominant
##  4 BM Macrophages (activated)            406    27   433 TRUE   BM-dominant 
##  5 Proliferating Myeloid                 120    82   202 TRUE   Shared      
##  6 CNS Macrophages (Trem2+/LAM)            5   123   128 FALSE  CNS-dominant
##  7 Homeostatic Microglia                   0   115   115 FALSE  CNS-dominant
##  8 Perivascular Macrophages (Lyve1+)       0   108   108 FALSE  CNS-dominant
##  9 IFN-Responsive Myeloid                  0    73    73 FALSE  CNS-dominant
## 10 CNS Macrophages (monocyte-derived)     60     3    63 FALSE  BM-dominant 
## 11 Stress/Dissociation-associated          0    43    43 FALSE  CNS-dominant
## 12 BAM (border-associated)                16    10    26 TRUE   Shared

5b — Within-Subtype CNS vs BM DE

shared_mac <- mac_cross %>% filter(shared) %>% pull(subtype)
de_mac_subtype <- list()

if (length(shared_mac) > 0) {
  for (st in shared_mac) {
    cat("\n### ", st, " — CNS vs BM\n\n")
    sub <- subset(myeloid, subtype == st)
    n_cns <- sum(sub$Tissue == "CNS"); n_bm <- sum(sub$Tissue == "BM")
    cat("Cells: CNS =", n_cns, ", BM =", n_bm, "\n\n")
    if (min(n_cns, n_bm) < 10) { cat("Skipped: too few cells\n"); next }
    
    Idents(sub) <- "Tissue"
    de <- FindMarkers(sub, ident.1 = "CNS", ident.2 = "BM",
                      test.use = "wilcox", min.pct = 0.1, logfc.threshold = 0)
    de$gene <- rownames(de)
    de_mac_subtype[[st]] <- de
    
    cat("Significant:", sum(de$p_val_adj < 0.05),
        "| CNS-enriched:", sum(de$avg_log2FC > 0.5 & de$p_val_adj < 0.05),
        "| BM-enriched:", sum(de$avg_log2FC < -0.5 & de$p_val_adj < 0.05), "\n\n")
    
    cat("**Top CNS-enriched:**\n\n")
    de %>% filter(p_val_adj < 0.05, avg_log2FC > 0.5) %>%
      arrange(desc(avg_log2FC)) %>% head(15) %>%
      select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
      kable(digits = c(NA, 2, 2, 2, 3)) %>% print()
    
    cat("\n**Top BM-enriched:**\n\n")
    de %>% filter(p_val_adj < 0.05, avg_log2FC < -0.5) %>%
      arrange(avg_log2FC) %>% head(15) %>%
      select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
      kable(digits = c(NA, 2, 2, 2, 3)) %>% print()
  }
} else {
  cat("No myeloid subtypes span both tissues with >=10 cells each.\n")
}

BM Macrophages — CNS vs BM

Cells: CNS = 83 , BM = 499

Significant: 16 | CNS-enriched: 4 | BM-enriched: 11

Top CNS-enriched:

gene avg_log2FC pct.1 pct.2 p_val_adj
Pelo Pelo 2.57 0.14 0.02 0.008
Mad2l1bp Mad2l1bp 2.15 0.13 0.02 0.033
Capg Capg 1.00 0.84 0.58 0.000
Sh3bgrl3 Sh3bgrl3 0.68 0.93 0.89 0.037

Top BM-enriched:

gene avg_log2FC pct.1 pct.2 p_val_adj
Sdc3 Sdc3 -1.50 0.41 0.67 0.004
Lamp1 Lamp1 -1.45 0.66 0.85 0.000
mt-Nd5 mt-Nd5 -1.27 0.32 0.63 0.009
Vcam1 Vcam1 -1.18 0.65 0.91 0.000
Fcna Fcna -1.07 0.55 0.84 0.000
mt-Co3 mt-Co3 -1.00 0.61 0.89 0.000
Slc40a1 Slc40a1 -0.97 0.75 0.86 0.016
mt-Atp6 mt-Atp6 -0.87 0.60 0.85 0.009
mt-Co2 mt-Co2 -0.79 0.65 0.89 0.002
mt-Co1 mt-Co1 -0.79 0.74 0.91 0.005
Selenop Selenop -0.75 1.00 0.99 0.000

BM Macrophages (Ly6c2+) — CNS vs BM

Cells: CNS = 544 , BM = 21

Significant: 44 | CNS-enriched: 3 | BM-enriched: 41

Top CNS-enriched:

gene avg_log2FC pct.1 pct.2 p_val_adj
Mt2 Mt2 4.34 0.70 0.10 0.011
Cdkn1a Cdkn1a 3.61 0.75 0.19 0.008
Mt1 Mt1 2.35 0.98 0.86 0.001

Top BM-enriched:

gene avg_log2FC pct.1 pct.2 p_val_adj
Serpina3g Serpina3g -5.55 0.00 0.14 0.000
Cd5l Cd5l -5.10 0.04 0.43 0.000
Napsa Napsa -5.01 0.00 0.14 0.000
Ugcg Ugcg -4.57 0.01 0.14 0.049
Dhcr7 Dhcr7 -4.52 0.01 0.19 0.000
Cdc14a Cdc14a -4.12 0.01 0.14 0.001
Tmem51 Tmem51 -4.06 0.02 0.24 0.000
Nedd4 Nedd4 -4.02 0.01 0.19 0.000
Crybg1 Crybg1 -3.99 0.04 0.29 0.001
Dcaf1 Dcaf1 -3.99 0.02 0.19 0.036
Hspa13 Hspa13 -3.95 0.02 0.19 0.047
Rptor Rptor -3.84 0.03 0.24 0.011
Itgal Itgal -3.74 0.01 0.19 0.000
Siae Siae -3.71 0.02 0.24 0.000
Fgr Fgr -3.68 0.01 0.19 0.001

BM Macrophages (activated) — CNS vs BM

Cells: CNS = 27 , BM = 406

Significant: 5 | CNS-enriched: 5 | BM-enriched: 0

Top CNS-enriched:

gene avg_log2FC pct.1 pct.2 p_val_adj
Dtx1 Dtx1 4.51 0.11 0.00 0.016
Zfp455 Zfp455 4.42 0.11 0.00 0.000
Zfp959 Zfp959 4.30 0.15 0.01 0.044
Seh1l Seh1l 3.04 0.30 0.05 0.004
Prmt3 Prmt3 2.18 0.30 0.05 0.031

Top BM-enriched:

gene avg_log2FC pct.1 pct.2 p_val_adj

Proliferating Myeloid — CNS vs BM

Cells: CNS = 82 , BM = 120

Significant: 114 | CNS-enriched: 41 | BM-enriched: 73

Top CNS-enriched:

gene avg_log2FC pct.1 pct.2 p_val_adj
Lyve1 Lyve1 8.22 0.20 0.00 0.014
Igfbp4 Igfbp4 8.03 0.34 0.00 0.000
Fcrls Fcrls 8.02 0.26 0.00 0.000
F13a1 F13a1 6.94 0.58 0.03 0.000
Clec5a Clec5a 6.08 0.22 0.00 0.002
Cbr2 Cbr2 4.84 0.77 0.09 0.000
Cfh Cfh 4.67 0.24 0.01 0.002
Grap Grap 4.12 0.33 0.03 0.000
Ncf1 Ncf1 3.90 0.39 0.03 0.000
Akr1b8 Akr1b8 3.84 0.34 0.03 0.000
Mef2c Mef2c 3.33 0.33 0.04 0.001
Smarca2 Smarca2 3.25 0.27 0.03 0.030
Ctla2b Ctla2b 3.18 0.29 0.03 0.006
Ifi27l2a Ifi27l2a 3.16 0.46 0.13 0.001
Klf2 Klf2 2.97 0.29 0.05 0.045

Top BM-enriched:

gene avg_log2FC pct.1 pct.2 p_val_adj
Pilrb2 Pilrb2 -6.03 0.00 0.24 0.047
Mrap Mrap -5.40 0.01 0.32 0.002
Cd5l Cd5l -4.79 0.05 0.73 0.000
Itgad Itgad -4.31 0.01 0.28 0.020
Ear2 Ear2 -3.94 0.01 0.51 0.000
Ccr3 Ccr3 -3.17 0.02 0.35 0.002
Il18bp Il18bp -3.14 0.17 0.67 0.000
Fabp4 Fabp4 -2.83 0.29 0.89 0.000
Cd300a Cd300a -2.76 0.15 0.61 0.000
Axl Axl -2.74 0.16 0.68 0.000
Sdc3 Sdc3 -2.72 0.20 0.68 0.000
Clec7a Clec7a -2.69 0.18 0.73 0.000
Actn1 Actn1 -2.59 0.17 0.52 0.001
Vcam1 Vcam1 -2.56 0.23 0.94 0.000
Slc40a1 Slc40a1 -2.54 0.40 0.86 0.000

BAM (border-associated) — CNS vs BM

Cells: CNS = 10 , BM = 16

Significant: 0 | CNS-enriched: 0 | BM-enriched: 0

Top CNS-enriched:

gene avg_log2FC pct.1 pct.2 p_val_adj

Top BM-enriched:

gene avg_log2FC pct.1 pct.2 p_val_adj

5c — All Macrophages: CNS vs BM (Excluding Microglia)

mac_only <- subset(myeloid, original_annotation == "Macrophages")
cat("Macrophages only:", ncol(mac_only), "cells\n")
## Macrophages only: 2031 cells
print(table(mac_only$Tissue))
## 
##   BM  CNS 
## 1127  904
Idents(mac_only) <- "Tissue"
de_mac_tissue <- FindMarkers(mac_only, ident.1 = "CNS", ident.2 = "BM",
                              test.use = "wilcox", min.pct = 0.1, logfc.threshold = 0)
de_mac_tissue$gene <- rownames(de_mac_tissue)

cat("\nMacrophage CNS vs BM — significant DE:", sum(de_mac_tissue$p_val_adj < 0.05), "\n")
## 
## Macrophage CNS vs BM — significant DE: 1300
cat("  CNS-enriched:", sum(de_mac_tissue$avg_log2FC > 0.5 & de_mac_tissue$p_val_adj < 0.05), "\n")
##   CNS-enriched: 738
cat("  BM-enriched:", sum(de_mac_tissue$avg_log2FC < -0.5 & de_mac_tissue$p_val_adj < 0.05), "\n")
##   BM-enriched: 419
mac_labels <- c("Trem2", "Apoe", "Axl", "Cd9", "Spp1", "Gpnmb",
                 "Mrc1", "Lyve1", "Cd163", "Arg1", "Lgals9",
                 "H2-Eb1", "H2-Ab1", "Cd74", "Ciita",
                 "Tnf", "Il1b", "Nos2", "Ccl2",
                 "C1qa", "Tgfb1", "Igf1",
                 "Lyz2", "Ly6c2", "Ccr2", "S100a4",
                 "P2ry12", "Tmem119", "Hexb")

de_mac_plot <- de_mac_tissue %>%
  mutate(
    sig = case_when(
      p_val_adj < 0.05 & avg_log2FC > 0.5  ~ "CNS-enriched",
      p_val_adj < 0.05 & avg_log2FC < -0.5 ~ "BM-enriched",
      TRUE ~ "NS"
    ),
    label = ifelse(gene %in% mac_labels |
                     (p_val_adj < 1e-20 & abs(avg_log2FC) > 2), gene, "")
  )

ggplot(de_mac_plot, aes(x = avg_log2FC, y = -log10(p_val_adj),
                         colour = sig, label = label)) +
  geom_point(size = 0.8, alpha = 0.5) +
  geom_text_repel(size = 3, max.overlaps = 30,
                   fontface = "italic", segment.size = 0.2) +
  scale_colour_manual(values = c("CNS-enriched" = "#B2182B",
                                  "BM-enriched" = "#2166AC", "NS" = "grey80")) +
  geom_vline(xintercept = c(-0.5, 0.5), linetype = "dashed", colour = "grey60") +
  theme_minimal(base_size = 12) +
  labs(title = "Macrophages: CNS vs BM", x = "log2FC (+ = CNS)", y = "-log10(padj)")

5d — Macrophage GO Enrichment

mac_cns_genes <- de_mac_tissue %>%
  filter(p_val_adj < 0.05, avg_log2FC > 0.5) %>% pull(gene)
mac_bm_genes <- de_mac_tissue %>%
  filter(p_val_adj < 0.05, avg_log2FC < -0.5) %>% pull(gene)

if (length(mac_cns_genes) >= 5) {
  go_mac_cns <- gost(mac_cns_genes, organism = "mmusculus",
                      sources = "GO:BP", evcodes = TRUE, significant = TRUE)
  if (!is.null(go_mac_cns$result) && nrow(go_mac_cns$result) > 0) {
    print(gostplot(go_mac_cns, capped = TRUE, interactive = FALSE) +
            ggtitle("Macrophage CNS-Enriched — GO BP"))
    go_mac_cns$result %>% filter(source == "GO:BP") %>%
      arrange(p_value) %>% head(20) %>%
      select(term_name, p_value, term_size, intersection_size) %>%
      kable(digits = c(NA, 3, 0, 0), caption = "Top 20 GO:BP — CNS Macrophages")
  }
}

Top 20 GO:BP — CNS Macrophages
term_name p_value term_size intersection_size
vesicle-mediated transport 0 1524 125
response to stress 0 3917 209
transport 0 4364 218
establishment of localization 0 4666 226
endocytosis 0 686 68
cellular localization 0 3493 184
positive regulation of biological process 0 6447 281
establishment of localization in cell 0 2036 128
negative regulation of biological process 0 5759 258
biological regulation 0 13503 478
positive regulation of cellular process 0 6095 267
regulation of biological process 0 13108 466
localization 0 5309 241
regulation of cellular process 0 12715 452
negative regulation of cellular process 0 5540 245
cellular process 0 21471 651
intracellular transport 0 1337 92
endosomal transport 0 269 37
immune system process 0 2848 149
import into cell 0 900 70
if (length(mac_bm_genes) >= 5) {
  go_mac_bm <- gost(mac_bm_genes, organism = "mmusculus",
                      sources = "GO:BP", evcodes = TRUE, significant = TRUE)
  if (!is.null(go_mac_bm$result) && nrow(go_mac_bm$result) > 0) {
    print(gostplot(go_mac_bm, capped = TRUE, interactive = FALSE) +
            ggtitle("Macrophage BM-Enriched — GO BP"))
    go_mac_bm$result %>% filter(source == "GO:BP") %>%
      arrange(p_value) %>% head(20) %>%
      select(term_name, p_value, term_size, intersection_size) %>%
      kable(digits = c(NA, 3, 0, 0), caption = "Top 20 GO:BP — BM Macrophages")
  }
}

Top 20 GO:BP — BM Macrophages
term_name p_value term_size intersection_size
localization 0 5309 196
transport 0 4364 171
establishment of localization 0 4666 174
import into cell 0 900 69
immune system process 0 2848 119
endocytosis 0 686 53
developmental process 0 6873 194
anatomical structure development 0 6280 183
response to stress 0 3917 136
cell migration 0 1539 78
cell motility 0 1804 83
transmembrane transport 0 1525 75
regulation of immune system process 0 1586 76
response to stimulus 0 10427 246
phagocytosis 0 221 29
regulation of cell migration 0 978 57
homeostatic process 0 1824 80
monoatomic ion transport 0 1235 64
positive regulation of biological process 0 6447 176
regulation of cell motility 0 1034 58

6. CNS-Specific Myeloid Comparisons

6a — CNS Myeloid Subtypes: All vs All

cns_myeloid <- subset(myeloid, Tissue == "CNS")
cns_mac_subtypes <- names(which(table(cns_myeloid$subtype) >= 10))
cat("CNS myeloid subtypes with >=10 cells:\n")
## CNS myeloid subtypes with >=10 cells:
for (st in cns_mac_subtypes) cat("  ", st, ":", sum(cns_myeloid$subtype == st), "\n")
##    BAM (border-associated) : 10 
##    BM Macrophages : 83 
##    BM Macrophages (activated) : 27 
##    BM Macrophages (Ly6c2+) : 544 
##    CNS Macrophages (Trem2+/LAM) : 123 
##    Homeostatic Microglia : 115 
##    Homeostatic Microglia (MHC-II high) : 1169 
##    IFN-Responsive Myeloid : 73 
##    Perivascular Macrophages (Lyve1+) : 108 
##    Proliferating Myeloid : 82 
##    Stress/Dissociation-associated : 43
Idents(cns_myeloid) <- "subtype"

if (length(cns_mac_subtypes) >= 2) {
  de_cns_myeloid <- FindAllMarkers(cns_myeloid, only.pos = TRUE,
                                    min.pct = 0.15, logfc.threshold = 0.5,
                                    verbose = FALSE)
  cat("\nDE genes distinguishing CNS myeloid subtypes:", nrow(de_cns_myeloid), "\n\n")
  
  de_cns_myeloid %>%
    group_by(cluster) %>% slice_max(avg_log2FC, n = 10) %>%
    select(cluster, gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
    kable(digits = c(NA, NA, 2, 2, 2, 3),
          caption = "Top 10 markers per CNS myeloid subtype")
}
## 
## DE genes distinguishing CNS myeloid subtypes: 11966
Top 10 markers per CNS myeloid subtype
cluster gene avg_log2FC pct.1 pct.2 p_val_adj
Proliferating Myeloid Mxd3 10.15 0.17 0.00 0.000
Proliferating Myeloid Fam83d 8.84 0.18 0.00 0.000
Proliferating Myeloid Troap 8.68 0.23 0.00 0.000
Proliferating Myeloid Nusap1 8.16 0.57 0.00 0.000
Proliferating Myeloid Aurka 7.78 0.46 0.00 0.000
Proliferating Myeloid Pimreg 7.68 0.30 0.00 0.000
Proliferating Myeloid Kif20a 7.66 0.44 0.00 0.000
Proliferating Myeloid Foxm1 7.64 0.16 0.00 0.000
Proliferating Myeloid Prc1 7.60 0.63 0.00 0.000
Proliferating Myeloid Birc5 7.57 0.80 0.01 0.000
Homeostatic Microglia (MHC-II high) Gm41071 3.49 0.15 0.02 0.000
Homeostatic Microglia (MHC-II high) Upk1b 3.43 0.22 0.02 0.000
Homeostatic Microglia (MHC-II high) Slc2a5 3.32 0.56 0.07 0.000
Homeostatic Microglia (MHC-II high) Thrsp 3.31 0.21 0.02 0.000
Homeostatic Microglia (MHC-II high) Tmem100 3.18 0.31 0.03 0.000
Homeostatic Microglia (MHC-II high) Ecscr 3.13 0.58 0.07 0.000
Homeostatic Microglia (MHC-II high) Gm2237 3.13 0.16 0.02 0.000
Homeostatic Microglia (MHC-II high) P2ry12 3.06 1.00 0.34 0.000
Homeostatic Microglia (MHC-II high) Jam2 3.05 0.28 0.04 0.000
Homeostatic Microglia (MHC-II high) Tmem119 3.02 0.95 0.17 0.000
BM Macrophages (Ly6c2+) Akr1b8 5.23 0.45 0.03 0.000
BM Macrophages (Ly6c2+) Srxn1 4.68 0.33 0.03 0.000
BM Macrophages (Ly6c2+) Prdx1 4.19 1.00 0.61 0.000
BM Macrophages (Ly6c2+) Clec4d 4.06 0.52 0.04 0.000
BM Macrophages (Ly6c2+) Adamtsl5 4.05 0.16 0.01 0.000
BM Macrophages (Ly6c2+) Ccl8 3.99 0.69 0.07 0.000
BM Macrophages (Ly6c2+) Cd209g 3.98 0.20 0.02 0.000
BM Macrophages (Ly6c2+) Marco 3.98 0.25 0.02 0.000
BM Macrophages (Ly6c2+) Mt2 3.97 0.70 0.07 0.000
BM Macrophages (Ly6c2+) Fxyd2 3.93 0.28 0.03 0.000
BM Macrophages Fabp4 5.02 0.75 0.09 0.000
BM Macrophages Gpnmb 4.82 0.57 0.06 0.000
BM Macrophages Acp5 4.80 0.59 0.03 0.000
BM Macrophages Ear2 4.53 0.25 0.01 0.000
BM Macrophages Atp6v0d2 4.45 0.75 0.07 0.000
BM Macrophages Mrap 4.42 0.16 0.01 0.000
BM Macrophages Nr1h3 4.29 0.59 0.06 0.000
BM Macrophages Anpep 4.08 0.36 0.04 0.000
BM Macrophages Cyfip2 4.07 0.23 0.02 0.000
BM Macrophages Mmp12 3.99 0.29 0.02 0.000
CNS Macrophages (Trem2+/LAM) Gm56975 7.00 0.18 0.00 0.000
CNS Macrophages (Trem2+/LAM) Pde7b 6.92 0.33 0.01 0.000
CNS Macrophages (Trem2+/LAM) Gm56990 6.45 0.21 0.00 0.000
CNS Macrophages (Trem2+/LAM) Adam33 6.12 0.33 0.01 0.000
CNS Macrophages (Trem2+/LAM) Klra2 5.82 0.24 0.01 0.000
CNS Macrophages (Trem2+/LAM) F630028O10Rik 5.58 0.76 0.04 0.000
CNS Macrophages (Trem2+/LAM) Ptprk 5.05 0.18 0.00 0.000
CNS Macrophages (Trem2+/LAM) H2-Eb1 4.99 0.20 0.02 0.000
CNS Macrophages (Trem2+/LAM) H2-Aa 4.94 0.24 0.03 0.000
CNS Macrophages (Trem2+/LAM) Trps1 4.78 0.59 0.05 0.000
IFN-Responsive Myeloid Gm32006 4.65 0.20 0.02 0.000
IFN-Responsive Myeloid Zfpm1 4.13 0.16 0.02 0.000
IFN-Responsive Myeloid Gm12610 4.07 0.15 0.03 0.000
IFN-Responsive Myeloid Zfp827 3.89 0.18 0.04 0.000
IFN-Responsive Myeloid Gm49662 3.72 0.18 0.03 0.000
IFN-Responsive Myeloid Kyat3 3.63 0.15 0.02 0.000
IFN-Responsive Myeloid Cables1 3.59 0.62 0.18 0.000
IFN-Responsive Myeloid Ksr1 3.55 0.18 0.04 0.001
IFN-Responsive Myeloid Setbp1 3.54 0.22 0.06 0.000
IFN-Responsive Myeloid Taco1 3.53 0.33 0.08 0.000
BAM (border-associated) Krtdap 11.32 0.20 0.00 0.000
BAM (border-associated) Cxcl1 11.28 0.20 0.00 0.000
BAM (border-associated) Gm17749 10.02 0.30 0.00 0.000
BAM (border-associated) Cxcl2 8.90 0.60 0.01 0.000
BAM (border-associated) Dusp9 8.29 0.20 0.00 0.000
BAM (border-associated) Id3 7.98 0.20 0.01 0.000
BAM (border-associated) Gadd45b 7.97 0.80 0.04 0.000
BAM (border-associated) Bex2 7.95 0.20 0.00 0.000
BAM (border-associated) Dusp8 7.85 0.30 0.00 0.000
BAM (border-associated) Gm37800 7.47 0.20 0.00 0.000
Stress/Dissociation-associated Cst7 4.51 0.49 0.05 0.000
Stress/Dissociation-associated Htra3 3.18 0.19 0.05 0.606
Stress/Dissociation-associated Slc2a6 2.98 0.16 0.03 0.016
Stress/Dissociation-associated Fasn 2.88 0.19 0.03 0.000
Stress/Dissociation-associated Chst1 2.72 0.21 0.05 0.147
Stress/Dissociation-associated Fam174c 2.37 0.16 0.05 1.000
Stress/Dissociation-associated Srebf2 2.37 0.19 0.06 1.000
Stress/Dissociation-associated Cd52 2.10 0.93 0.58 0.000
Stress/Dissociation-associated Hcst 2.07 0.28 0.11 1.000
Stress/Dissociation-associated Gm15564 1.98 0.44 0.15 0.002
Homeostatic Microglia Ifi206 4.36 0.30 0.02 0.000
Homeostatic Microglia Ifit3b 4.11 0.34 0.01 0.000
Homeostatic Microglia Rsad2 3.97 0.30 0.02 0.000
Homeostatic Microglia Ifit2 3.89 0.57 0.05 0.000
Homeostatic Microglia Ifit1 3.71 0.31 0.02 0.000
Homeostatic Microglia Mx1 3.71 0.51 0.05 0.000
Homeostatic Microglia A330040F15Rik 3.67 0.20 0.03 0.000
Homeostatic Microglia Usp18 3.65 0.44 0.05 0.000
Homeostatic Microglia Ifit3 3.56 0.65 0.07 0.000
Homeostatic Microglia Zbp1 3.54 0.44 0.05 0.000
Perivascular Macrophages (Lyve1+) Ptgds 4.57 0.34 0.03 0.000
Perivascular Macrophages (Lyve1+) C4b 4.40 0.48 0.06 0.000
Perivascular Macrophages (Lyve1+) C2 4.15 0.18 0.02 0.000
Perivascular Macrophages (Lyve1+) Gpx3 4.03 0.65 0.10 0.000
Perivascular Macrophages (Lyve1+) Cd209b 4.03 0.16 0.01 0.000
Perivascular Macrophages (Lyve1+) Igfbp4 3.70 0.79 0.21 0.000
Perivascular Macrophages (Lyve1+) Cd163 3.51 0.81 0.14 0.000
Perivascular Macrophages (Lyve1+) Timd4 3.33 0.36 0.04 0.000
Perivascular Macrophages (Lyve1+) Siglec1 3.32 0.56 0.09 0.000
Perivascular Macrophages (Lyve1+) Mt3 3.29 0.16 0.02 0.000
BM Macrophages (activated) Paqr9 8.01 0.18 0.00 0.000
BM Macrophages (activated) Ubd 7.71 0.26 0.00 0.000
BM Macrophages (activated) Pdzk1 7.53 0.30 0.00 0.000
BM Macrophages (activated) Cd5l 6.96 1.00 0.03 0.000
BM Macrophages (activated) Ccr3 6.89 0.59 0.01 0.000
BM Macrophages (activated) Kcna2 6.79 0.26 0.00 0.000
BM Macrophages (activated) Itgad 6.46 0.48 0.01 0.000
BM Macrophages (activated) Ccdc148 6.30 0.33 0.01 0.000
BM Macrophages (activated) Ear2 6.28 0.67 0.01 0.000
BM Macrophages (activated) Mmp27 6.14 0.18 0.00 0.000
CNS Macrophages (monocyte-derived) 2310081O03Rik 12.04 0.33 0.00 0.000
CNS Macrophages (monocyte-derived) Fbln5 12.04 0.33 0.00 0.000
CNS Macrophages (monocyte-derived) Mamdc2 11.30 0.33 0.00 0.000
CNS Macrophages (monocyte-derived) Pon1 11.29 0.33 0.00 0.000
CNS Macrophages (monocyte-derived) Gm9869 11.29 0.33 0.00 0.000
CNS Macrophages (monocyte-derived) Coch 11.29 0.33 0.00 0.000
CNS Macrophages (monocyte-derived) Gm52182 11.29 0.33 0.00 0.000
CNS Macrophages (monocyte-derived) Aqp1 11.02 0.33 0.00 0.000
CNS Macrophages (monocyte-derived) Cspg4 11.02 0.33 0.00 0.000
CNS Macrophages (monocyte-derived) Gm8098 11.02 0.33 0.00 0.000
if (exists("de_cns_myeloid") && nrow(de_cns_myeloid) > 0) {
  top_cns_mac <- de_cns_myeloid %>%
    group_by(cluster) %>% slice_max(avg_log2FC, n = 8) %>%
    pull(gene) %>% unique()
  
  DoHeatmap(cns_myeloid, features = top_cns_mac, size = 3, group.by = "subtype") +
    theme(axis.text.y = element_text(face = "italic", size = 6)) +
    ggtitle("CNS Myeloid Subtype Markers")
}

6b — CNS Macrophages vs Microglia-like (Refined)

cns_mac_mg <- subset(myeloid, Tissue == "CNS" &
                       original_annotation %in% c("Macrophages", "Microglia-like macrophages"))
cat("CNS comparison:\n")
## CNS comparison:
print(table(cns_mac_mg$original_annotation))
## 
##                Macrophages Microglia-like macrophages 
##                        904                       1476
Idents(cns_mac_mg) <- "original_annotation"
de_mac_vs_mg <- FindMarkers(cns_mac_mg,
                              ident.1 = "Microglia-like macrophages",
                              ident.2 = "Macrophages",
                              test.use = "wilcox", min.pct = 0.1, logfc.threshold = 0)
de_mac_vs_mg$gene <- rownames(de_mac_vs_mg)

cat("\nMicroglia-like vs CNS Macrophages:")
## 
## Microglia-like vs CNS Macrophages:
cat("\n  Significant:", sum(de_mac_vs_mg$p_val_adj < 0.05))
## 
##   Significant: 4725
cat("\n  Microglia-enriched:", sum(de_mac_vs_mg$avg_log2FC > 0.5 & de_mac_vs_mg$p_val_adj < 0.05))
## 
##   Microglia-enriched: 3197
cat("\n  Macrophage-enriched:", sum(de_mac_vs_mg$avg_log2FC < -0.5 & de_mac_vs_mg$p_val_adj < 0.05), "\n")
## 
##   Macrophage-enriched: 1363

6c — Polarisation Comparison: CNS Mac vs Microglia

pol_cols_present <- pol_cols[pol_cols %in% colnames(cns_mac_mg@meta.data)]
if (length(pol_cols_present) > 0) {
  VlnPlot(cns_mac_mg, features = pol_cols_present,
          group.by = "original_annotation", pt.size = 0, ncol = 2,
          cols = c("Macrophages" = "#FF7F00",
                    "Microglia-like macrophages" = "#984EA3")) &
    theme(axis.text.x = element_text(angle = 30, hjust = 1, size = 9))
}


7. Summary & Composition

7a — Stroma Composition

stroma_clean@meta.data %>%
  count(subtype, Tissue) %>%
  ggplot(aes(x = reorder(subtype, -n), y = n, fill = Tissue)) +
  geom_col(position = "dodge") +
  scale_fill_manual(values = c("BM" = "#2166AC", "CNS" = "#B2182B")) +
  theme_minimal(base_size = 11) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title = "Stroma Subtype Composition by Tissue", x = "", y = "Cell Count")

7b — Myeloid Composition

myeloid@meta.data %>%
  count(subtype, Tissue) %>%
  ggplot(aes(x = reorder(subtype, -n), y = n, fill = Tissue)) +
  geom_col(position = "dodge") +
  scale_fill_manual(values = c("BM" = "#2166AC", "CNS" = "#B2182B")) +
  theme_minimal(base_size = 11) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title = "Myeloid Subtype Composition by Tissue", x = "", y = "Cell Count")

7c — Tissue Proportions

p1 <- stroma_clean@meta.data %>%
  count(Tissue, subtype) %>% group_by(Tissue) %>%
  mutate(pct = n / sum(n) * 100) %>%
  ggplot(aes(x = Tissue, y = pct, fill = subtype)) +
  geom_col() + theme_minimal(base_size = 11) +
  labs(title = "Stroma: Subtype Proportions", y = "% of Tissue Stroma", fill = "Subtype") +
  theme(legend.text = element_text(size = 7))

p2 <- myeloid@meta.data %>%
  count(Tissue, subtype) %>% group_by(Tissue) %>%
  mutate(pct = n / sum(n) * 100) %>%
  ggplot(aes(x = Tissue, y = pct, fill = subtype)) +
  geom_col() + theme_minimal(base_size = 11) +
  labs(title = "Myeloid: Subtype Proportions", y = "% of Tissue Myeloid", fill = "Subtype") +
  theme(legend.text = element_text(size = 7))

p1 + p2


8. Export

write.csv(de_stroma_clean,
          file.path(cache_dir, "13c_DE_stroma_CNS_vs_BM_clean.csv"), row.names = FALSE)
write.csv(de_mac_tissue,
          file.path(cache_dir, "13c_DE_macrophages_CNS_vs_BM.csv"), row.names = FALSE)
write.csv(de_mac_vs_mg,
          file.path(cache_dir, "13c_DE_CNS_mac_vs_microglia.csv"), row.names = FALSE)

for (nm in names(de_stroma_subtype)) {
  fn <- gsub("[^[:alnum:]]", "_", nm)
  write.csv(de_stroma_subtype[[nm]],
            file.path(cache_dir, paste0("13c_DE_stroma_", fn, "_CNS_vs_BM.csv")), row.names = FALSE)
}
for (nm in names(de_mac_subtype)) {
  fn <- gsub("[^[:alnum:]]", "_", nm)
  write.csv(de_mac_subtype[[nm]],
            file.path(cache_dir, paste0("13c_DE_myeloid_", fn, "_CNS_vs_BM.csv")), row.names = FALSE)
}

if (exists("de_cns_subtypes"))
  write.csv(de_cns_subtypes, file.path(cache_dir, "13c_CNS_stroma_subtype_markers.csv"), row.names = FALSE)
if (exists("de_cns_myeloid"))
  write.csv(de_cns_myeloid, file.path(cache_dir, "13c_CNS_myeloid_subtype_markers.csv"), row.names = FALSE)

# Helper to flatten list columns for CSV export
flatten_gost <- function(df) {
  df %>% mutate(across(where(is.list), ~sapply(., paste, collapse = ",")))
}

if (exists("go_stroma_cns") && !is.null(go_stroma_cns$result))
  write.csv(flatten_gost(go_stroma_cns$result),
            file.path(cache_dir, "13c_GO_stroma_CNS_clean.csv"), row.names = FALSE)
if (exists("go_stroma_bm") && !is.null(go_stroma_bm$result))
  write.csv(flatten_gost(go_stroma_bm$result),
            file.path(cache_dir, "13c_GO_stroma_BM_clean.csv"), row.names = FALSE)
if (exists("go_mac_cns") && !is.null(go_mac_cns$result))
  write.csv(flatten_gost(go_mac_cns$result),
            file.path(cache_dir, "13c_GO_macrophage_CNS.csv"), row.names = FALSE)
if (exists("go_mac_bm") && !is.null(go_mac_bm$result))
  write.csv(flatten_gost(go_mac_bm$result),
            file.path(cache_dir, "13c_GO_macrophage_BM.csv"), row.names = FALSE)

qs_save(stroma_clean, file.path(cache_dir, "13c_stroma_annotated_clean.qs"))
qs_save(myeloid, file.path(cache_dir, "13c_myeloid_annotated.qs"))

cat("All results saved to:", cache_dir, "\n")
## All results saved to: /exports/eddie/scratch/aduguid3/harmony_clustering

Session Information

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Rocky Linux 9.5 (Blue Onyx)
## 
## Matrix products: default
## BLAS/LAPACK: /opt/intel/oneapi/mkl/2024.0/lib/libmkl_gf_lp64.so.2;  LAPACK version 3.10.1
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/London
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] RColorBrewer_1.1-3    ggrepel_0.9.6         gprofiler2_0.2.4     
##  [4] UCell_2.10.1          knitr_1.51            circlize_0.4.17      
##  [7] ComplexHeatmap_2.22.0 patchwork_1.3.2       ggplot2_4.0.2        
## [10] tidyr_1.3.1           dplyr_1.1.4           qs2_0.1.7            
## [13] Seurat_5.4.0          SeuratObject_5.3.0    sp_2.1-4             
## 
## loaded via a namespace (and not attached):
##   [1] RcppAnnoy_0.0.23            splines_4.4.1              
##   [3] later_1.4.7                 bitops_1.0-9               
##   [5] tibble_3.3.1                polyclip_1.10-7            
##   [7] fastDummies_1.7.5           lifecycle_1.0.5            
##   [9] doParallel_1.0.17           globals_0.16.3             
##  [11] lattice_0.22-6              MASS_7.3-60.2              
##  [13] magrittr_2.0.3              limma_3.62.2               
##  [15] plotly_4.12.0               sass_0.4.9                 
##  [17] rmarkdown_2.30              jquerylib_0.1.4            
##  [19] yaml_2.3.12                 httpuv_1.6.16              
##  [21] otel_0.2.0                  sctransform_0.4.3          
##  [23] spam_2.11-3                 spatstat.sparse_3.1-0      
##  [25] reticulate_1.45.0           cowplot_1.2.0              
##  [27] pbapply_1.7-4               abind_1.4-5                
##  [29] zlibbioc_1.52.0             Rtsne_0.17                 
##  [31] GenomicRanges_1.58.0        presto_1.0.0               
##  [33] purrr_1.0.2                 RCurl_1.98-1.17            
##  [35] BiocGenerics_0.52.0         GenomeInfoDbData_1.2.13    
##  [37] IRanges_2.40.1              S4Vectors_0.44.0           
##  [39] irlba_2.3.7                 listenv_0.9.1              
##  [41] spatstat.utils_3.2-1        goftest_1.2-3              
##  [43] RSpectra_0.16-2             spatstat.random_3.4-4      
##  [45] fitdistrplus_1.2-6          parallelly_1.37.1          
##  [47] codetools_0.2-20            DelayedArray_0.32.0        
##  [49] tidyselect_1.2.1            shape_1.4.6.1              
##  [51] UCSC.utils_1.2.0            farver_2.1.2               
##  [53] matrixStats_1.5.0           stats4_4.4.1               
##  [55] spatstat.explore_3.7-0      jsonlite_2.0.0             
##  [57] GetoptLong_1.1.0            BiocNeighbors_2.0.1        
##  [59] progressr_0.18.0            ggridges_0.5.6             
##  [61] survival_3.6-4              iterators_1.0.14           
##  [63] foreach_1.5.2               tools_4.4.1                
##  [65] ica_1.0-3                   Rcpp_1.0.12                
##  [67] glue_1.8.0                  gridExtra_2.3              
##  [69] SparseArray_1.6.2           xfun_0.56                  
##  [71] MatrixGenerics_1.18.1       GenomeInfoDb_1.42.3        
##  [73] withr_3.0.2                 fastmap_1.2.0              
##  [75] fansi_1.0.6                 digest_0.6.35              
##  [77] R6_2.6.1                    mime_0.12                  
##  [79] colorspace_2.1-0            scattermore_1.2            
##  [81] tensor_1.5.1                dichromat_2.0-0.1          
##  [83] spatstat.data_3.1-9         utf8_1.2.4                 
##  [85] generics_0.1.3              data.table_1.15.4          
##  [87] httr_1.4.7                  htmlwidgets_1.6.4          
##  [89] S4Arrays_1.6.0              uwot_0.2.4                 
##  [91] pkgconfig_2.0.3             gtable_0.3.6               
##  [93] lmtest_0.9-40               S7_0.2.1                   
##  [95] SingleCellExperiment_1.28.1 XVector_0.46.0             
##  [97] htmltools_0.5.8.1           dotCall64_1.2              
##  [99] clue_0.3-67                 scales_1.4.0               
## [101] Biobase_2.66.0              png_0.1-8                  
## [103] spatstat.univar_3.1-6       rstudioapi_0.16.0          
## [105] reshape2_1.4.4              rjson_0.2.23               
## [107] curl_7.0.0                  nlme_3.1-164               
## [109] cachem_1.1.0                zoo_1.8-15                 
## [111] GlobalOptions_0.1.3         stringr_1.5.1              
## [113] KernSmooth_2.23-24          parallel_4.4.1             
## [115] miniUI_0.1.2                vipor_0.4.7                
## [117] ggrastr_1.0.2               pillar_1.9.0               
## [119] vctrs_0.6.5                 RANN_2.6.2                 
## [121] promises_1.5.0              stringfish_0.18.0          
## [123] xtable_1.8-8                cluster_2.1.6              
## [125] beeswarm_0.4.0              evaluate_1.0.5             
## [127] cli_3.6.5                   compiler_4.4.1             
## [129] rlang_1.1.7                 crayon_1.5.2               
## [131] future.apply_1.11.2         labeling_0.4.3             
## [133] plyr_1.8.9                  ggbeeswarm_0.7.3           
## [135] stringi_1.8.4               viridisLite_0.4.2          
## [137] deldir_2.0-4                BiocParallel_1.40.2        
## [139] lazyeval_0.2.2              spatstat.geom_3.7-0        
## [141] Matrix_1.7-0                RcppHNSW_0.6.0             
## [143] future_1.33.2               statmod_1.5.1              
## [145] shiny_1.13.0                SummarizedExperiment_1.36.0
## [147] ROCR_1.0-12                 igraph_2.2.2               
## [149] RcppParallel_5.1.8          bslib_0.7.0