Overview

Before running NicheNet, we need to properly characterise the niche populations identified in the CellChat analysis. This script answers:

  • What are these cells? (marker panels from CNS border / niche literature)
  • How do they differ between CNS and BM? (tissue-specific DE)
  • What functional programmes are active? (module scoring)

Populations: Stroma, Macrophages, Microglia-like macrophages

Setup

library(Seurat)
library(qs2)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggrepel)
library(patchwork)
library(ComplexHeatmap)
library(gprofiler2)
library(circlize)
library(knitr)
library(UCell)

base_path <- "/exports/eddie/scratch/aduguid3"
cache_dir <- file.path(base_path, "harmony_clustering")
obj <- qs_read(file.path(cache_dir, "12_combined_seurat.qs"))
DefaultAssay(obj) <- "originalexp"
obj <- NormalizeData(obj, verbose = FALSE)

cat("Total cells:", ncol(obj), "\n")
## Total cells: 109322
cat("\nCell types:\n")
## 
## Cell types:
print(table(obj$cell_annotation))
## 
##                      B-ALL                  Basophils 
##                      91623                        555 
##                  CD8_Naive    CD8_Teff (unclassified) 
##                       1768                         80 
##                    CD8_Tex                   CD8_Tpex 
##                        284                        143 
##        CD8+ NKT-like cells      Effector CD4+ T cells 
##                        288                        814 
##                Macrophages Microglia-like macrophages 
##                       2031                       1540 
##    Myeloid Dendritic cells              Naive B cells 
##                        117                         97 
##      Natural killer  cells                Neutrophils 
##                        979                       1145 
##                   NKT-like    Non-classical monocytes 
##                        388                        606 
##                   Other γδ             Plasma B cells 
##                        505                        844 
##                Pre-B cells           Progenitor cells 
##                       3098                        714 
##                     Stroma                      T_eff 
##                       1442                         66 
##                        Vγ4                     Vγ6Vδ4 
##                         72                        123
cat("\nBy tissue:\n")
## 
## By tissue:
print(table(obj$cell_annotation, obj$Tissue))
##                             
##                                 BM   CNS
##   B-ALL                      44606 47017
##   Basophils                    523    32
##   CD8_Naive                   1639   129
##   CD8_Teff (unclassified)       49    31
##   CD8_Tex                      183   101
##   CD8_Tpex                     113    30
##   CD8+ NKT-like cells          173   115
##   Effector CD4+ T cells        631   183
##   Macrophages                 1127   904
##   Microglia-like macrophages    64  1476
##   Myeloid Dendritic cells       76    41
##   Naive B cells                 94     3
##   Natural killer  cells        887    92
##   Neutrophils                 1103    42
##   NKT-like                     219   169
##   Non-classical monocytes      356   250
##   Other γδ                     253   252
##   Plasma B cells               754    90
##   Pre-B cells                 2893   205
##   Progenitor cells             709     5
##   Stroma                       719   723
##   T_eff                         32    34
##   Vγ4                           40    32
##   Vγ6Vδ4                         2   121

1. Global Niche UMAP

1a — Compute UMAP

niche_types <- c("Stroma", "Macrophages", "Microglia-like macrophages")
niche <- subset(obj, cell_annotation %in% niche_types)
cat("Niche cells:", ncol(niche), "\n")
## Niche cells: 5013
print(table(niche$cell_annotation, niche$Tissue))
##                             
##                                BM  CNS
##   Macrophages                1127  904
##   Microglia-like macrophages   64 1476
##   Stroma                      719  723
niche <- NormalizeData(niche, verbose = FALSE)
niche <- FindVariableFeatures(niche, nfeatures = 2000, verbose = FALSE)
niche <- ScaleData(niche, verbose = FALSE)
niche <- RunPCA(niche, npcs = 20, verbose = FALSE)
niche <- RunUMAP(niche, dims = 1:15, verbose = FALSE)

cat("Niche UMAP computed\n")
## Niche UMAP computed

1b — Overview Plots

p1 <- DimPlot(niche, group.by = "cell_annotation",
              cols = c("Stroma" = "#228B22", "Macrophages" = "#FF7F00",
                        "Microglia-like macrophages" = "#984EA3"),
              label = TRUE, label.size = 4) +
  ggtitle("Niche Populations")

p2 <- DimPlot(niche, group.by = "Tissue",
              cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) +
  ggtitle("By Tissue")

p3 <- DimPlot(niche, group.by = "cell_annotation",
              split.by = "Tissue",
              cols = c("Stroma" = "#228B22", "Macrophages" = "#FF7F00",
                        "Microglia-like macrophages" = "#984EA3")) +
  ggtitle("Split by Tissue") + NoLegend()

(p1 + p2) / p3

1c — Key Markers on Niche UMAP

niche_markers <- c("Col1a1", "Pdgfra", "Pdgfrb",        # stroma
                    "Adgre1", "Cd68", "Mrc1",             # macrophage
                    "P2ry12", "Tmem119", "Hexb",           # microglia
                    "Trem2", "Apoe", "Cxcl12")             # functional
niche_markers_present <- niche_markers[niche_markers %in% rownames(niche)]

FeaturePlot(niche, features = niche_markers_present,
            order = TRUE, ncol = 4,
            cols = c("lightgrey", "darkred")) &
  theme(plot.title = element_text(face = "bold.italic", size = 10))


2. Stroma Subclustering

2a — Subcluster Stroma

stroma <- subset(niche, cell_annotation == "Stroma")
cat("Stroma cells:", ncol(stroma), "\n")
## Stroma cells: 1442
print(table(stroma$Tissue))
## 
##  BM CNS 
## 719 723
stroma <- NormalizeData(stroma, verbose = FALSE)
stroma <- FindVariableFeatures(stroma, nfeatures = 2000, verbose = FALSE)
stroma <- ScaleData(stroma, verbose = FALSE)
stroma <- RunPCA(stroma, npcs = 15, verbose = FALSE)
stroma <- RunUMAP(stroma, dims = 1:10, verbose = FALSE)
stroma <- FindNeighbors(stroma, dims = 1:10, verbose = FALSE)

# Try a few resolutions
for (res in c(0.3, 0.5, 0.8)) {
  stroma <- FindClusters(stroma, resolution = res, verbose = FALSE)
  cat("Resolution", res, "->", length(unique(stroma@meta.data[[paste0("originalexp_snn_res.", res)]])), "clusters\n")
}
## Resolution 0.3 -> 6 clusters
## Resolution 0.5 -> 9 clusters
## Resolution 0.8 -> 11 clusters
# Default to res 0.5 — adjust if needed
stroma <- FindClusters(stroma, resolution = 0.5, verbose = FALSE)
Idents(stroma) <- "seurat_clusters"
p1 <- DimPlot(stroma, group.by = "seurat_clusters",
              label = TRUE, label.size = 5) +
  ggtitle("Stroma Subclusters")

p2 <- DimPlot(stroma, group.by = "Tissue",
              cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) +
  ggtitle("By Tissue")

p1 + p2

stroma_comp <- stroma@meta.data %>%
  count(seurat_clusters, Tissue) %>%
  group_by(seurat_clusters) %>%
  mutate(total = sum(n), pct = round(n / total * 100, 1)) %>%
  ungroup()

cat("Stroma cluster composition:\n")
## Stroma cluster composition:
print(stroma_comp %>% pivot_wider(id_cols = c(seurat_clusters, total),
                                    names_from = Tissue,
                                    values_from = n, values_fill = 0))
## # A tibble: 9 × 4
##   seurat_clusters total   CNS    BM
##   <fct>           <int> <int> <int>
## 1 0                 294   294     0
## 2 1                 284     7   277
## 3 2                 275   275     0
## 4 3                 161     2   159
## 5 4                 122   122     0
## 6 5                 122     4   118
## 7 6                 111     0   111
## 8 7                  55     1    54
## 9 8                  18    18     0

2b — Stroma Cluster Markers

stroma_markers <- FindAllMarkers(stroma, only.pos = TRUE,
                                  min.pct = 0.2, logfc.threshold = 0.5,
                                  verbose = FALSE)
cat("Total marker genes:", nrow(stroma_markers), "\n")
## Total marker genes: 19694
stroma_markers %>%
  group_by(cluster) %>%
  slice_max(avg_log2FC, n = 10) %>%
  select(cluster, gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  kable(digits = c(NA, NA, 2, 2, 2, 3), caption = "Top 10 markers per stroma subcluster")
Top 10 markers per stroma subcluster
cluster gene avg_log2FC pct.1 pct.2 p_val_adj
0 Ptgds 4.09 0.34 0.09 0
0 Penk 3.35 0.65 0.19 0
0 Crym 2.82 0.25 0.10 0
0 Asgr1 2.68 0.29 0.09 0
0 Igfbp2 2.61 0.45 0.15 0
0 Reg3g 2.54 0.28 0.08 0
0 Ptn 2.49 0.88 0.32 0
0 Crabp2 2.46 0.30 0.09 0
0 Apod 2.26 0.98 0.40 0
0 Atp6v0e2 2.25 0.29 0.14 0
1 Ibsp 11.99 0.48 0.00 0
1 Ebf3 10.23 0.67 0.00 0
1 Shox2 9.40 0.35 0.00 0
1 Cpa6 9.34 0.35 0.00 0
1 Dlx5 9.21 0.26 0.00 0
1 Gdpd2 9.04 0.60 0.00 0
1 Hoxc9 9.00 0.24 0.00 0
1 Prr16 8.94 0.47 0.01 0
1 Inhbb 8.93 0.34 0.00 0
1 Csmd1 8.91 0.88 0.06 0
2 Chil1 4.44 0.35 0.04 0
2 Gm31641 3.98 0.32 0.03 0
2 Epha3 3.83 0.79 0.10 0
2 Mmp3 3.78 0.33 0.04 0
2 Itga8 3.73 0.32 0.02 0
2 Cemip 3.64 0.75 0.10 0
2 Zfp385b 3.61 0.21 0.01 0
2 Nox4 3.54 0.58 0.04 0
2 Frmpd4 3.53 0.40 0.04 0
2 Adam33 3.41 0.46 0.03 0
3 Gm30948 10.29 0.22 0.00 0
3 Gm32196 9.17 0.32 0.00 0
3 Siglech 9.14 0.49 0.00 0
3 Rag2 9.02 0.36 0.00 0
3 Klrd1 8.67 0.58 0.00 0
3 Gm34680 8.53 0.52 0.00 0
3 Cox6a2 8.53 0.81 0.01 0
3 Gm56536 8.13 0.33 0.00 0
3 Blk 7.82 0.37 0.00 0
3 Col19a1 7.60 0.24 0.00 0
4 Chrm3 4.07 0.75 0.09 0
4 Nlgn1 3.99 0.39 0.05 0
4 Lrrc38 3.69 0.22 0.02 0
4 Nalcn 3.65 0.39 0.03 0
4 A330076H08Rik 3.60 0.26 0.04 0
4 Kcnb2 3.56 0.72 0.10 0
4 Rtl4 3.56 0.35 0.05 0
4 Kctd8 3.43 0.51 0.10 0
4 Susd4 3.40 0.20 0.02 0
4 Slc9b1 3.38 0.26 0.04 0
5 Gp9 6.64 0.57 0.04 0
5 Parvb 6.59 0.23 0.02 0
5 Gp1ba 6.33 0.37 0.01 0
5 Treml1 6.30 0.43 0.02 0
5 F2rl2 6.14 0.43 0.02 0
5 Gm15915 5.61 0.51 0.02 0
5 Gp5 5.56 0.51 0.02 0
5 Il1b 5.55 0.24 0.01 0
5 Pf4 5.42 0.57 0.04 0
5 Mmrn1 4.98 0.37 0.02 0
6 Gm56930 6.54 0.44 0.01 0
6 Cd200r4 5.70 0.21 0.00 0
6 Clec4e 5.47 0.58 0.03 0
6 Cxcr2 5.30 0.26 0.01 0
6 Lincred1 5.27 0.21 0.01 0
6 Ctsg 5.10 0.47 0.04 0
6 Mpo 5.05 0.51 0.06 0
6 Csf3r 4.99 0.61 0.04 0
6 Prtn3 4.65 0.98 0.19 0
6 Gm34703 4.63 0.39 0.03 0
7 Ankle1 8.39 0.54 0.00 0
7 Pif1 7.86 0.46 0.00 0
7 Ska1 7.27 0.60 0.01 0
7 Mxd3 7.25 0.42 0.00 0
7 Fam83d 7.23 0.38 0.00 0
7 Ttk 7.20 0.69 0.01 0
7 Pimreg 7.01 0.74 0.01 0
7 Kif2c 6.98 0.64 0.01 0
7 Kif18b 6.75 0.76 0.01 0
7 H3c3 6.72 0.60 0.01 0
8 2900040C04Rik 14.77 0.83 0.00 0
8 Ttr 13.81 0.89 0.02 0
8 Prr32 12.87 0.83 0.00 0
8 Ppp1r1b 12.87 0.94 0.00 0
8 Otx2 12.54 0.83 0.00 0
8 Kl 12.53 0.72 0.00 0
8 Foxj1 11.89 0.67 0.00 0
8 Vat1l 11.51 0.78 0.00 0
8 Pcp4 11.48 0.61 0.00 0
8 Cox8b 11.29 0.33 0.00 0
top_genes <- stroma_markers %>%
  group_by(cluster) %>%
  slice_max(avg_log2FC, n = 8) %>%
  pull(gene) %>% unique()

DoHeatmap(stroma, features = top_genes, size = 3,
          group.colors = scales::hue_pal()(length(unique(stroma$seurat_clusters)))) +
  theme(axis.text.y = element_text(face = "italic", size = 7)) +
  ggtitle("Stroma Subcluster Markers")

2c — Identity Marker Panels

stroma_id_genes <- list(
  "Meningeal_fibroblast" = c("Col1a1", "Col1a2", "Col3a1", "Dcn", "Lum",
                               "Pdgfra", "Fbln1", "Mgp"),
  "Pericyte" = c("Pdgfrb", "Rgs5", "Acta2", "Des", "Notch3",
                   "Kcnj8", "Abcc9", "Vtn"),
  "Endothelial" = c("Pecam1", "Cdh5", "Flt1", "Kdr", "Cldn5",
                      "Emcn", "Plvap"),
  "Leptomeningeal" = c("Slc47a1", "Aldh1a2", "Crabp2", "Ptgds", "Slc6a13"),
  "CAF_like" = c("Acta2", "Fap", "Postn", "Tnc", "Mmp2", "Mmp14"),
  "Niche_support" = c("Cxcl12", "Cxcl16", "Kitl", "Ptn", "Mdk",
                        "Igf1", "Wnt5a", "Il33"),
  "Immunomodulatory" = c("Cd274", "Pdcd1lg2", "Tgfb1", "Tgfb2",
                           "Nt5e", "Ptgs2")
)
all_stroma_id <- unique(unlist(stroma_id_genes))
present_stroma_id <- all_stroma_id[all_stroma_id %in% rownames(stroma)]
cat("Identity genes present:", length(present_stroma_id), "/", length(all_stroma_id), "\n")
## Identity genes present: 47 / 47
DotPlot(stroma, features = present_stroma_id,
        cols = c("lightgrey", "darkred"), dot.scale = 5) +
  RotatedAxis() +
  ggtitle("Stroma Identity Markers by Subcluster") +
  theme(axis.text.x = element_text(face = "italic", size = 7))

key_stroma <- c("Col1a1", "Pdgfra", "Pdgfrb", "Rgs5", "Pecam1", "Cdh5",
                 "Cxcl12", "Cxcl16", "Ptn", "Mdk", "Kitl", "Tgfb1")
key_stroma_present <- key_stroma[key_stroma %in% rownames(stroma)]

FeaturePlot(stroma, features = key_stroma_present,
            order = TRUE, ncol = 4,
            cols = c("lightgrey", "darkred")) &
  theme(plot.title = element_text(face = "bold.italic", size = 10))

2d — Stroma Functional Scoring

stroma_modules <- list(
  ECM_remodelling = c("Col1a1", "Col1a2", "Col3a1", "Fn1", "Tnc",
                       "Postn", "Mmp2", "Mmp14", "Timp1", "Lox"),
  Growth_support = c("Ptn", "Mdk", "Igf1", "Igf2", "Fgf2",
                      "Kitl", "Wnt5a", "Pdgfa"),
  Niche_retention = c("Cxcl12", "Cxcl16", "Vcam1", "Icam1",
                        "Fn1", "Lama1", "Lamb1"),
  Angiogenic = c("Vegfa", "Vegfb", "Angpt1", "Angpt2",
                   "Pdgfb", "Hif1a", "Flt1"),
  Immunomodulatory = c("Tgfb1", "Tgfb2", "Il33", "Cd274",
                         "Pdcd1lg2", "Nt5e", "Ptgs2"),
  CAF_programme = c("Acta2", "Fap", "Postn", "Tnc", "Fn1",
                      "Mmp2", "Mmp9", "Mmp14")
)

stroma_mods_filt <- lapply(stroma_modules, function(g) g[g %in% rownames(stroma)])
stroma_mods_filt <- stroma_mods_filt[sapply(stroma_mods_filt, length) >= 3]

stroma <- AddModuleScore_UCell(stroma, features = stroma_mods_filt, name = "_UCell")
stroma_score_cols <- grep("_UCell$", colnames(stroma@meta.data), value = TRUE)

VlnPlot(stroma, features = stroma_score_cols,
        group.by = "seurat_clusters", pt.size = 0,
        ncol = 3) &
  theme(axis.text.x = element_text(size = 10))

# Split by tissue for each module
VlnPlot(stroma, features = stroma_score_cols,
        group.by = "seurat_clusters", split.by = "Tissue",
        pt.size = 0, ncol = 3,
        cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) &
  theme(axis.text.x = element_text(size = 10))


3. Myeloid Subclustering

Pool macrophages and CNS microglia-like into a single myeloid object. BM microglia-like cells are excluded (CNS-restricted population, few if any BM cells).

3a — Pool and Subcluster

# Macrophages from both tissues + microglia-like from CNS only
mac_cells <- WhichCells(niche, expression = cell_annotation == "Macrophages")
mg_cns_cells <- WhichCells(niche, expression = cell_annotation == "Microglia-like macrophages" & Tissue == "CNS")

cat("BM microglia-like excluded:",
    sum(niche$cell_annotation == "Microglia-like macrophages" & niche$Tissue == "BM"), "cells\n")
## BM microglia-like excluded: 64 cells
myeloid <- subset(niche, cells = c(mac_cells, mg_cns_cells))

# Preserve original annotation for later comparison
myeloid$original_annotation <- myeloid$cell_annotation

cat("Myeloid pool:", ncol(myeloid), "cells\n")
## Myeloid pool: 3507 cells
print(table(myeloid$original_annotation, myeloid$Tissue))
##                             
##                                BM  CNS
##   Macrophages                1127  904
##   Microglia-like macrophages    0 1476
myeloid <- NormalizeData(myeloid, verbose = FALSE)
myeloid <- FindVariableFeatures(myeloid, nfeatures = 2000, verbose = FALSE)
myeloid <- ScaleData(myeloid, verbose = FALSE)
myeloid <- RunPCA(myeloid, npcs = 20, verbose = FALSE)
myeloid <- RunUMAP(myeloid, dims = 1:15, verbose = FALSE)
myeloid <- FindNeighbors(myeloid, dims = 1:15, verbose = FALSE)

# Multiple resolutions
for (res in c(0.3, 0.5, 0.8)) {
  myeloid <- FindClusters(myeloid, resolution = res, verbose = FALSE)
  cat("Resolution", res, "->",
      length(unique(myeloid@meta.data[[paste0("originalexp_snn_res.", res)]])), "clusters\n")
}
## Resolution 0.3 -> 9 clusters
## Resolution 0.5 -> 12 clusters
## Resolution 0.8 -> 13 clusters
myeloid <- FindClusters(myeloid, resolution = 0.5, verbose = FALSE)
Idents(myeloid) <- "seurat_clusters"
p1 <- DimPlot(myeloid, group.by = "seurat_clusters",
              label = TRUE, label.size = 5) +
  ggtitle("Myeloid Subclusters")

p2 <- DimPlot(myeloid, group.by = "Tissue",
              cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) +
  ggtitle("By Tissue")

p3 <- DimPlot(myeloid, group.by = "original_annotation",
              cols = c("Macrophages" = "#FF7F00", "Microglia-like macrophages" = "#984EA3")) +
  ggtitle("Original Annotation")

p4 <- DimPlot(myeloid, group.by = "original_annotation",
              split.by = "Tissue",
              cols = c("Macrophages" = "#FF7F00", "Microglia-like macrophages" = "#984EA3")) +
  ggtitle("Annotation x Tissue") + NoLegend()

(p1 + p2) / (p3 + p4)

myeloid_comp <- myeloid@meta.data %>%
  count(seurat_clusters, original_annotation, Tissue) %>%
  group_by(seurat_clusters) %>%
  mutate(total = sum(n), pct = round(n / total * 100, 1)) %>%
  ungroup()

cat("Myeloid cluster composition:\n")
## Myeloid cluster composition:
print(myeloid_comp %>% pivot_wider(id_cols = c(seurat_clusters, total),
                                     names_from = c(original_annotation, Tissue),
                                     values_from = n, values_fill = 0))
## # A tibble: 12 × 5
##    seurat_clusters total Macrophages_CNS Microglia-like macroph…¹ Macrophages_BM
##    <fct>           <int>           <int>                    <int>          <int>
##  1 0                1169               1                     1168              0
##  2 1                 582              76                        7            499
##  3 2                 565             496                       48             21
##  4 3                 433              27                        0            406
##  5 4                 202              64                       18            120
##  6 5                 128             115                        8              5
##  7 6                 115               4                      111              0
##  8 7                 108             102                        6              0
##  9 8                  73               3                       70              0
## 10 9                  63               3                        0             60
## 11 10                 43               4                       39              0
## 12 11                 26               9                        1             16
## # ℹ abbreviated name: ¹​`Microglia-like macrophages_CNS`

3b — Myeloid Cluster Markers

myeloid_markers <- FindAllMarkers(myeloid, only.pos = TRUE,
                                    min.pct = 0.2, logfc.threshold = 0.5,
                                    verbose = FALSE)
cat("Total marker genes:", nrow(myeloid_markers), "\n")
## Total marker genes: 12288
myeloid_markers %>%
  group_by(cluster) %>%
  slice_max(avg_log2FC, n = 10) %>%
  select(cluster, gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  kable(digits = c(NA, NA, 2, 2, 2, 3), caption = "Top 10 markers per myeloid subcluster")
Top 10 markers per myeloid subcluster
cluster gene avg_log2FC pct.1 pct.2 p_val_adj
0 Upk1b 4.38 0.22 0.01 0.000
0 Slc2a5 4.27 0.56 0.04 0.000
0 Thrsp 4.26 0.21 0.01 0.000
0 Tmem100 4.13 0.31 0.02 0.000
0 Ecscr 4.05 0.58 0.04 0.000
0 Jam2 3.96 0.28 0.02 0.000
0 Tmem119 3.95 0.95 0.09 0.000
0 P2ry12 3.95 1.00 0.22 0.000
0 Gal3st4 3.86 0.48 0.04 0.000
0 Adora3 3.84 0.47 0.04 0.000
1 H2-Aa 3.67 0.40 0.06 0.000
1 H2-Eb1 3.66 0.32 0.04 0.000
1 H2-Ab1 3.47 0.42 0.10 0.000
1 Cd74 3.32 0.53 0.23 0.000
1 Gpnmb 2.96 0.44 0.10 0.000
1 Atp6v0d2 2.70 0.60 0.14 0.000
1 Pla2g2d 2.65 0.26 0.07 0.000
1 Anpep 2.64 0.38 0.08 0.000
1 Acp5 2.47 0.66 0.13 0.000
1 Cd4 2.38 0.46 0.12 0.000
2 Akr1b8 4.67 0.44 0.04 0.000
2 Srxn1 3.96 0.33 0.05 0.000
2 Clec4d 3.79 0.50 0.06 0.000
2 Mt2 3.53 0.68 0.12 0.000
2 Maoa 3.49 0.22 0.03 0.000
2 Prdx1 3.17 1.00 0.75 0.000
2 Cbr2 3.04 0.83 0.16 0.000
2 Cd209f 2.99 0.25 0.04 0.000
2 Chchd10 2.85 0.31 0.04 0.000
2 Ccl8 2.78 0.68 0.15 0.000
3 Itgad 4.32 0.60 0.04 0.000
3 Ubd 4.20 0.25 0.01 0.000
3 Ccr3 4.14 0.58 0.04 0.000
3 Kcna2 3.98 0.20 0.01 0.000
3 Paqr9 3.90 0.27 0.02 0.000
3 Ccdc148 3.90 0.28 0.02 0.000
3 Tmem26 3.47 0.24 0.03 0.000
3 Cd5l 3.43 0.98 0.15 0.000
3 Pira2 3.32 0.28 0.03 0.000
3 Mrap 3.30 0.46 0.06 0.000
4 Aurka 7.42 0.33 0.00 0.000
4 Nusap1 7.35 0.46 0.00 0.000
4 Prc1 7.09 0.54 0.00 0.000
4 Pimreg 6.74 0.20 0.00 0.000
4 H2bc14 6.67 0.24 0.00 0.000
4 Plk1 6.61 0.40 0.00 0.000
4 H1f5 6.56 0.67 0.04 0.000
4 Birc5 6.53 0.68 0.01 0.000
4 Ube2c 6.50 0.63 0.02 0.000
4 Ckap2l 6.49 0.45 0.01 0.000
5 Pde7b 6.78 0.33 0.01 0.000
5 Gm56990 6.76 0.21 0.00 0.000
5 Adam33 6.15 0.32 0.01 0.000
5 Klra2 5.98 0.23 0.01 0.000
5 F630028O10Rik 5.12 0.76 0.04 0.000
5 Gm56663 4.65 0.31 0.02 0.000
5 Ccr1 4.65 0.71 0.06 0.000
5 Cp 4.47 0.63 0.06 0.000
5 Trps1 4.44 0.60 0.06 0.000
5 Oas2 4.18 0.29 0.02 0.000
6 Ifi206 4.77 0.30 0.01 0.000
6 Ifit3b 4.28 0.34 0.01 0.000
6 Mx1 4.21 0.51 0.03 0.000
6 Ifit2 4.09 0.57 0.04 0.000
6 Ifit1 4.02 0.31 0.02 0.000
6 Usp18 4.02 0.44 0.04 0.000
6 Oasl2 3.90 0.83 0.08 0.000
6 Ms4a4c 3.89 0.30 0.03 0.000
6 Rsad2 3.82 0.30 0.02 0.000
6 Zbp1 3.81 0.44 0.04 0.000
7 Ptgds 5.15 0.34 0.02 0.000
7 C4b 4.29 0.48 0.06 0.000
7 Gpx3 4.20 0.65 0.09 0.000
7 Igfbp4 4.19 0.79 0.15 0.000
7 Mgl2 3.81 0.29 0.03 0.000
7 Ednrb 3.33 0.35 0.05 0.000
7 Ccl24 3.20 0.38 0.06 0.000
7 Lyve1 3.19 0.82 0.10 0.000
7 Cd163 3.07 0.81 0.19 0.000
7 Gas7 3.07 0.45 0.10 0.000
8 Gm32006 5.19 0.20 0.01 0.000
8 Setbp1 4.11 0.22 0.04 0.000
8 Cables1 4.10 0.62 0.12 0.000
8 Myo18b 4.05 0.22 0.03 0.000
8 Fhit 3.93 0.69 0.20 0.000
8 Khdrbs3 3.87 0.27 0.05 0.000
8 Plcl1 3.84 0.89 0.30 0.000
8 Smyd3 3.83 0.69 0.17 0.000
8 Vis1 3.79 0.63 0.13 0.000
8 1700028E10Rik 3.78 0.26 0.05 0.000
9 Aqp1 12.62 0.36 0.00 0.000
9 Cspg4 12.22 0.44 0.00 0.000
9 Fn1 10.40 0.67 0.00 0.000
9 Dpysl3 10.13 0.52 0.00 0.000
9 Serpinh1 9.17 0.25 0.00 0.000
9 Tppp3 9.04 0.73 0.00 0.000
9 Mamdc2 9.02 0.21 0.00 0.000
9 Vcan 8.36 0.60 0.00 0.000
9 Atp2b4 8.32 0.40 0.00 0.000
9 Nherf2 7.54 0.64 0.00 0.000
10 Cst7 5.03 0.49 0.03 0.000
10 Chst1 3.20 0.21 0.04 0.000
10 Ctse 2.44 0.26 0.06 0.001
10 Hcar2 2.35 0.21 0.04 0.006
10 Bicd2 2.32 0.23 0.09 1.000
10 Syngr1 2.28 0.93 0.38 0.000
10 Gm38843 2.21 0.28 0.07 0.003
10 Cd52 2.19 0.93 0.54 0.000
10 Tent5c 2.03 0.26 0.12 1.000
10 Hcst 2.00 0.28 0.11 1.000
11 Cxcl2 8.53 0.38 0.00 0.000
11 Gadd45b 6.66 0.73 0.07 0.000
11 Dusp1 6.03 0.81 0.06 0.000
11 Eno2 5.99 0.23 0.00 0.000
11 Tm4sf19 5.82 0.23 0.00 0.000
11 Tnfrsf12a 5.76 0.50 0.02 0.000
11 St18 5.70 0.23 0.01 0.000
11 Gdf15 5.52 0.77 0.12 0.000
11 Tnf 5.45 0.65 0.05 0.000
11 Hbegf 5.39 0.27 0.02 0.000
top_myeloid <- myeloid_markers %>%
  group_by(cluster) %>%
  slice_max(avg_log2FC, n = 8) %>%
  pull(gene) %>% unique()

DoHeatmap(myeloid, features = top_myeloid, size = 3) +
  theme(axis.text.y = element_text(face = "italic", size = 7)) +
  ggtitle("Myeloid Subcluster Markers")

3c — Identity Marker Panels

myeloid_id_genes <- c(
  # Pan-macrophage
  "Adgre1", "Cd68", "Csf1r", "Fcgr1",
  # Homeostatic microglia
  "P2ry12", "Tmem119", "Hexb", "Siglech", "Sall1",
  # BAM
  "Mrc1", "Lyve1", "Cd163", "Siglec1", "Pf4", "Folr2",
  # Monocyte-derived
  "Ly6c2", "Ccr2", "S100a4", "Sell", "Plac8",
  # Tissue-resident
  "Timd4", "Cbr2",
  # LAM / TREM2
  "Trem2", "Apoe", "Axl", "Cd9", "Spp1", "Gpnmb", "Lpl",
  # Inflammatory
  "Tnf", "Il1b", "Nos2", "Ccl2", "Cd86",
  # Tolerogenic
  "Arg1", "Tgfb1", "Il10", "Cd274",
  # Antigen-presenting
  "H2-Eb1", "H2-Ab1", "Cd74", "Ciita",
  # Complement
  "C1qa", "C1qb", "C1qc"
)
myeloid_id_present <- myeloid_id_genes[myeloid_id_genes %in% rownames(myeloid)]

DotPlot(myeloid, features = myeloid_id_present,
        cols = c("lightgrey", "darkred"), dot.scale = 5) +
  RotatedAxis() +
  ggtitle("Myeloid Identity Markers by Subcluster") +
  theme(axis.text.x = element_text(face = "italic", size = 7))

key_myeloid <- c("Adgre1", "Cd68",
                  "P2ry12", "Tmem119", "Hexb", "Sall1",
                  "Mrc1", "Lyve1", "Cd163",
                  "Trem2", "Apoe", "Axl",
                  "Ly6c2", "Ccr2",
                  "C1qa", "Tgfb1", "H2-Eb1", "Cd274")
key_myeloid_present <- key_myeloid[key_myeloid %in% rownames(myeloid)]

FeaturePlot(myeloid, features = key_myeloid_present,
            order = TRUE, ncol = 4,
            cols = c("lightgrey", "darkred")) &
  theme(plot.title = element_text(face = "bold.italic", size = 10))

3d — Myeloid Functional Scoring

myeloid_modules <- list(
  Tolerogenic = c("Tgfb1", "Tgfb2", "Il10", "Arg1", "Mrc1", "Cd163",
                   "Ptgs2", "Nt5e", "Cd274", "Vegfa", "Socs1", "Socs3"),
  Pro_inflammatory = c("Tnf", "Il1b", "Il6", "Nos2", "Ccl2", "Ccl3",
                         "Ccl4", "Cxcl10", "Cd86", "Cd80", "Irf5", "Stat1"),
  Antigen_presentation = c("H2-Eb1", "H2-Ab1", "H2-Aa", "Cd74", "Ciita",
                             "Cd80", "Cd86", "Tap1", "Tap2", "B2m"),
  TREM2_programme = c("Trem2", "Tyrobp", "Apoe", "Axl", "Cd9", "Spp1",
                       "Lpl", "Cst7", "Gpnmb", "Itgax", "Clec7a"),
  Complement = c("C1qa", "C1qb", "C1qc", "C3", "C4b", "Cfh", "Cfp"),
  Homeostatic_MG = c("P2ry12", "Tmem119", "Hexb", "Siglech", "Sall1",
                       "Cx3cr1", "Csf1r"),
  BAM_signature = c("Mrc1", "Lyve1", "Cd163", "Siglec1", "Pf4",
                      "Cbr2", "Ms4a7", "Folr2"),
  Phagocytosis = c("Mertk", "Axl", "Tyro3", "Cd36", "Megf10",
                     "Marco", "Scarb1")
)

myeloid_mods_filt <- lapply(myeloid_modules, function(g) g[g %in% rownames(myeloid)])
myeloid_mods_filt <- myeloid_mods_filt[sapply(myeloid_mods_filt, length) >= 3]

myeloid <- AddModuleScore_UCell(myeloid, features = myeloid_mods_filt, name = "_UCell")
myeloid_score_cols <- grep("_UCell$", colnames(myeloid@meta.data), value = TRUE)

VlnPlot(myeloid, features = myeloid_score_cols,
        group.by = "seurat_clusters", pt.size = 0,
        ncol = 3) &
  theme(axis.text.x = element_text(size = 10))

# Compare original macrophage vs microglia-like labels within CNS
myeloid_cns <- subset(myeloid, Tissue == "CNS")

VlnPlot(myeloid_cns, features = myeloid_score_cols,
        group.by = "original_annotation", pt.size = 0,
        ncol = 3,
        cols = c("Macrophages" = "#FF7F00", "Microglia-like macrophages" = "#984EA3")) &
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9))


4. Microglia Validation

Key question: are these “microglia-like” cells true parenchymal microglia (which should NOT be in a dural/meningeal dataset) or border-associated macrophages (BAMs) / CNS-associated macrophages (CAMs) that share some microglial features?

4a — Core Microglia Signature

# Parenchymal microglia require: P2ry12, Tmem119, Hexb, Siglech, Sall1
# BAMs share Cx3cr1/Csf1r but LACK the above core set
# BAMs express: Mrc1 (CD206), Lyve1, Cd163, Siglec1, Pf4

mg_validation <- c(
  # Core parenchymal — must be HIGH for true microglia
  "P2ry12", "Tmem119", "Hexb", "Siglech", "Sall1",
  # BAM markers — expected in border-associated
  "Mrc1", "Lyve1", "Cd163", "Siglec1", "Pf4", "Folr2",
  # Shared (uninformative alone)
  "Cx3cr1", "Csf1r", "Adgre1",
  # DAM / activated microglia
  "Trem2", "Apoe", "Axl", "Cd9", "Spp1",
  # Perivascular macrophage
  "Cd36", "Msr1", "Ms4a7"
)
mg_val_present <- mg_validation[mg_validation %in% rownames(myeloid)]

# DotPlot across myeloid subclusters
DotPlot(myeloid, features = mg_val_present,
        cols = c("lightgrey", "darkred"), dot.scale = 5) +
  RotatedAxis() +
  ggtitle("Microglia vs BAM Validation — All Myeloid Subclusters") +
  theme(axis.text.x = element_text(face = "italic", size = 8))

4b — Microglia-like Cells Specifically

# Compare CNS macrophages vs CNS microglia-like on validation panel
myeloid_cns$annot <- myeloid_cns$original_annotation
Idents(myeloid_cns) <- "annot"

DotPlot(myeloid_cns, features = mg_val_present,
        cols = c("lightgrey", "darkred"), dot.scale = 6) +
  RotatedAxis() +
  ggtitle("CNS Macrophages vs Microglia-like — Validation Panel") +
  theme(axis.text.x = element_text(face = "italic", size = 8))

# Direct violin comparison for the most critical markers
critical_mg <- c("P2ry12", "Tmem119", "Hexb", "Sall1",
                  "Mrc1", "Lyve1", "Cd163", "Trem2",
                 "Siglech", "Fcrls", "Gpr34", "P2ry13")
critical_present <- critical_mg[critical_mg %in% rownames(myeloid_cns)]

VlnPlot(myeloid_cns, features = critical_present,
        group.by = "annot", pt.size = 0.1, ncol = 4,
        cols = c("Macrophages" = "#FF7F00", "Microglia-like macrophages" = "#984EA3")) &
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

4c — Microglia Score vs BAM Score

# Scatter: Homeostatic MG score vs BAM score per cell
# True microglia = high MG, low BAM; BAMs = low MG, high BAM

if (all(c("Homeostatic_MG_UCell", "BAM_signature_UCell") %in% colnames(myeloid@meta.data))) {
  
  myeloid@meta.data %>%
    filter(Tissue == "CNS") %>%
    ggplot(aes(x = BAM_signature_UCell, y = Homeostatic_MG_UCell,
               colour = original_annotation)) +
    geom_point(size = 0.5, alpha = 0.5) +
    scale_colour_manual(values = c("Macrophages" = "#FF7F00",
                                    "Microglia-like macrophages" = "#984EA3")) +
    geom_hline(yintercept = 0.15, linetype = "dashed", colour = "grey50") +
    geom_vline(xintercept = 0.15, linetype = "dashed", colour = "grey50") +
    theme_minimal(base_size = 12) +
    labs(title = "Homeostatic Microglia vs BAM Signature — CNS Myeloid",
         subtitle = "True microglia: top-left; BAMs: bottom-right",
         x = "BAM Signature Score (UCell)", y = "Homeostatic MG Score (UCell)",
         colour = "Original\nAnnotation")
}

4d — Interpretation Guide

cat("
=====================================================================
  MICROGLIA VALIDATION — INTERPRETATION
=====================================================================

  TRUE PARENCHYMAL MICROGLIA would show:
    + High P2ry12, Tmem119, Hexb, Siglech, Sall1
    - Low Mrc1, Lyve1, Cd163
    -> Unexpected in dural/meningeal dataset
    -> Would suggest parenchymal contamination

  BORDER-ASSOCIATED MACROPHAGES (BAMs) would show:
    - Low/absent P2ry12, Tmem119, Sall1
    + High Mrc1, Lyve1, Cd163, Pf4
    + May express Cx3cr1, Trem2 (shared)
    -> Expected in dural/meningeal border niche

  DAM/ACTIVATED MICROGLIA-LIKE would show:
    ~ Reduced P2ry12/Tmem119 (downregulated on activation)
    + High Trem2, Apoe, Axl, Cd9, Spp1
    -> Could be disease-associated transition state

=====================================================================
")
## 
## =====================================================================
##   MICROGLIA VALIDATION — INTERPRETATION
## =====================================================================
## 
##   TRUE PARENCHYMAL MICROGLIA would show:
##     + High P2ry12, Tmem119, Hexb, Siglech, Sall1
##     - Low Mrc1, Lyve1, Cd163
##     -> Unexpected in dural/meningeal dataset
##     -> Would suggest parenchymal contamination
## 
##   BORDER-ASSOCIATED MACROPHAGES (BAMs) would show:
##     - Low/absent P2ry12, Tmem119, Sall1
##     + High Mrc1, Lyve1, Cd163, Pf4
##     + May express Cx3cr1, Trem2 (shared)
##     -> Expected in dural/meningeal border niche
## 
##   DAM/ACTIVATED MICROGLIA-LIKE would show:
##     ~ Reduced P2ry12/Tmem119 (downregulated on activation)
##     + High Trem2, Apoe, Axl, Cd9, Spp1
##     -> Could be disease-associated transition state
## 
## =====================================================================

5. CNS vs BM Macrophage Comparison

5a — CNS Macrophage vs Microglia-like DE

Idents(myeloid_cns) <- "original_annotation"

de_mac_vs_mg <- FindMarkers(myeloid_cns,
                              ident.1 = "Microglia-like macrophages",
                              ident.2 = "Macrophages",
                              test.use = "wilcox",
                              min.pct = 0.1, logfc.threshold = 0)
de_mac_vs_mg$gene <- rownames(de_mac_vs_mg)

cat("CNS Microglia-like vs CNS Macrophages — significant DE genes:",
    sum(de_mac_vs_mg$p_val_adj < 0.05), "\n")
## CNS Microglia-like vs CNS Macrophages — significant DE genes: 4725
cat("  Microglia-like enriched (log2FC > 0.5):",
    sum(de_mac_vs_mg$avg_log2FC > 0.5 & de_mac_vs_mg$p_val_adj < 0.05), "\n")
##   Microglia-like enriched (log2FC > 0.5): 3197
cat("  Macrophage enriched (log2FC < -0.5):",
    sum(de_mac_vs_mg$avg_log2FC < -0.5 & de_mac_vs_mg$p_val_adj < 0.05), "\n")
##   Macrophage enriched (log2FC < -0.5): 1363
cat("=== Top 30 Microglia-like enriched genes ===\n")
## === Top 30 Microglia-like enriched genes ===
de_mac_vs_mg %>%
  filter(p_val_adj < 0.05, avg_log2FC > 0.5) %>%
  arrange(desc(avg_log2FC)) %>%
  select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  head(30) %>%
  kable(digits = c(NA, 2, 2, 2, 3))
gene avg_log2FC pct.1 pct.2 p_val_adj
Sall1 Sall1 11.50 0.64 0.00 0
Sall3 Sall3 9.90 0.33 0.00 0
Jam2 Jam2 9.43 0.26 0.00 0
Upk1b Upk1b 9.33 0.19 0.00 0
Gal3st4 Gal3st4 9.15 0.44 0.00 0
Gm3739 Gm3739 9.06 0.20 0.00 0
Yes1 Yes1 8.99 0.19 0.00 0
H2-Oa H2-Oa 8.83 0.18 0.00 0
Gm33100 Gm33100 8.82 0.16 0.00 0
Asap3 Asap3 8.77 0.16 0.00 0
Gm43821 Gm43821 8.59 0.14 0.00 0
Itgb3 Itgb3 8.56 0.15 0.00 0
Klk8 Klk8 8.56 0.27 0.00 0
Naalad2 Naalad2 8.56 0.12 0.00 0
Ecscr Ecscr 8.43 0.52 0.00 0
Gm41071 Gm41071 8.37 0.14 0.00 0
Sdk1 Sdk1 8.24 0.24 0.00 0
Csmd3 Csmd3 8.08 0.85 0.01 0
Gm16118 Gm16118 8.01 0.11 0.00 0
A830008E24Rik A830008E24Rik 8.00 0.50 0.00 0
B3galt5 B3galt5 7.98 0.11 0.00 0
Kcnma1 Kcnma1 7.96 0.25 0.00 0
6030468B19Rik 6030468B19Rik 7.92 0.10 0.00 0
Gp9 Gp9 7.92 0.15 0.00 0
Cd34 Cd34 7.89 0.46 0.00 0
Kcnd1 Kcnd1 7.77 0.10 0.00 0
Slc2a5 Slc2a5 7.74 0.50 0.00 0
Adamts16 Adamts16 7.73 0.11 0.00 0
Nav3 Nav3 7.51 0.82 0.01 0
Cacnb2 Cacnb2 7.48 0.54 0.00 0
cat("\n=== Top 30 Macrophage enriched genes ===\n")
## 
## === Top 30 Macrophage enriched genes ===
de_mac_vs_mg %>%
  filter(p_val_adj < 0.05, avg_log2FC < -0.5) %>%
  arrange(avg_log2FC) %>%
  select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  head(30) %>%
  kable(digits = c(NA, 2, 2, 2, 3))
gene avg_log2FC pct.1 pct.2 p_val_adj
Acp5 Acp5 -6.28 0.00 0.13 0
Mgl2 Mgl2 -6.11 0.00 0.14 0
Clec10a Clec10a -6.08 0.00 0.27 0
Rgs18 Rgs18 -6.07 0.00 0.13 0
Nxpe5 Nxpe5 -6.01 0.00 0.14 0
Cd4 Cd4 -5.92 0.00 0.13 0
Timd4 Timd4 -5.88 0.00 0.13 0
Fcna Fcna -5.87 0.01 0.50 0
Lyve1 Lyve1 -5.79 0.01 0.38 0
Cd38 Cd38 -5.70 0.01 0.40 0
Mrc1 Mrc1 -5.55 0.06 0.66 0
Cfp Cfp -5.47 0.03 0.56 0
Colec12 Colec12 -5.47 0.00 0.17 0
Cd163 Cd163 -5.45 0.01 0.42 0
Cmbl Cmbl -5.34 0.00 0.14 0
Ccl7 Ccl7 -5.24 0.02 0.28 0
Ccl8 Ccl8 -5.19 0.02 0.53 0
Ms4a4a Ms4a4a -5.17 0.02 0.53 0
Pla2g2d Pla2g2d -5.15 0.00 0.12 0
Vav3 Vav3 -5.14 0.01 0.17 0
Gpx3 Gpx3 -5.11 0.01 0.30 0
Clec4n Clec4n -5.06 0.02 0.51 0
Folr2 Folr2 -5.06 0.03 0.51 0
Ednrb Ednrb -5.05 0.01 0.18 0
Spic Spic -5.01 0.01 0.26 0
Cd93 Cd93 -4.91 0.01 0.19 0
Ifi203 Ifi203 -4.88 0.01 0.20 0
Cd55 Cd55 -4.84 0.00 0.10 0
Pf4 Pf4 -4.83 0.04 0.76 0
Siglec1 Siglec1 -4.82 0.02 0.27 0
mg_mac_labels <- c("P2ry12", "Tmem119", "Hexb", "Siglech", "Sall1",
                     "Mrc1", "Lyve1", "Cd163", "Pf4", "Folr2",
                     "Trem2", "Apoe", "Axl", "Cd9", "Spp1",
                     "C1qa", "C1qb", "Tgfb1", "Il10",
                     "H2-Eb1", "Cd74", "Ccr2", "Ly6c2",
                     "Adgre1", "Cd68", "Csf1r")

de_mac_vs_mg_plot <- de_mac_vs_mg %>%
  mutate(
    sig = case_when(
      p_val_adj < 0.05 & avg_log2FC > 0.5  ~ "Microglia-like macrophages",
      p_val_adj < 0.05 & avg_log2FC < -0.5 ~ "Macrophage",
      TRUE ~ "NS"
    ),
    label = ifelse(gene %in% mg_mac_labels |
                     (p_val_adj < 1e-10 & abs(avg_log2FC) > 1.5),
                   gene, "")
  )

ggplot(de_mac_vs_mg_plot, aes(x = avg_log2FC, y = -log10(p_val_adj),
                               colour = sig, label = label)) +
  geom_point(size = 0.8, alpha = 0.5) +
  geom_text_repel(size = 3, max.overlaps = 30,
                   fontface = "italic", segment.size = 0.2) +
  scale_colour_manual(values = c("Microglia-like macrophages" = "#984EA3",
                                  "Macrophage" = "#FF7F00", "NS" = "grey80")) +
  geom_vline(xintercept = c(-0.5, 0.5), linetype = "dashed", colour = "grey60") +
  theme_minimal(base_size = 12) +
  labs(title = "CNS: Microglia-like vs Macrophages",
       x = "log2FC (+ = Microglia-like)", y = "-log10(padj)")


6. CNS vs BM Differential Expression + GO

6a — Stroma: CNS vs BM

Idents(stroma) <- "Tissue"

de_stroma <- FindMarkers(stroma,
                          ident.1 = "CNS", ident.2 = "BM",
                          test.use = "wilcox",
                          min.pct = 0.1, logfc.threshold = 0)
de_stroma$gene <- rownames(de_stroma)

cat("Stroma — significant DE genes (padj < 0.05):", sum(de_stroma$p_val_adj < 0.05), "\n")
## Stroma — significant DE genes (padj < 0.05): 5656
cat("  CNS-enriched (log2FC > 0.5):",
    sum(de_stroma$avg_log2FC > 0.5 & de_stroma$p_val_adj < 0.05), "\n")
##   CNS-enriched (log2FC > 0.5): 2152
cat("  BM-enriched (log2FC < -0.5):",
    sum(de_stroma$avg_log2FC < -0.5 & de_stroma$p_val_adj < 0.05), "\n")
##   BM-enriched (log2FC < -0.5): 3174
cat("=== Top 30 CNS-enriched stroma genes ===\n")
## === Top 30 CNS-enriched stroma genes ===
de_stroma %>%
  filter(p_val_adj < 0.05, avg_log2FC > 0.5) %>%
  arrange(desc(avg_log2FC)) %>%
  select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  head(30) %>%
  kable(digits = c(NA, 2, 2, 2, 3))
gene avg_log2FC pct.1 pct.2 p_val_adj
Penk Penk 12.89 0.56 0 0
Prok2 Prok2 12.61 0.69 0 0
Slc47a1 Slc47a1 11.57 0.78 0 0
Mmp3 Mmp3 11.45 0.19 0 0
Slc4a10 Slc4a10 11.29 0.86 0 0
Mfap4 Mfap4 10.89 0.47 0 0
Ptgdr Ptgdr 10.87 0.72 0 0
Ccn3 Ccn3 10.70 0.75 0 0
Cpxm2 Cpxm2 10.59 0.62 0 0
Reg3g Reg3g 10.57 0.24 0 0
Prmt8 Prmt8 10.54 0.62 0 0
Dcc Dcc 10.47 0.53 0 0
Cpz Cpz 10.45 0.65 0 0
Zic1 Zic1 10.41 0.70 0 0
Cldn11 Cldn11 10.37 0.59 0 0
Fmod Fmod 10.07 0.36 0 0
Shisa3 Shisa3 10.02 0.79 0 0
Il33 Il33 10.01 0.74 0 0
Ildr2 Ildr2 9.92 0.84 0 0
Igfbp6 Igfbp6 9.92 0.87 0 0
Mpzl2 Mpzl2 9.80 0.54 0 0
Wfdc1 Wfdc1 9.66 0.58 0 0
Tspan11 Tspan11 9.65 0.85 0 0
Foxd1 Foxd1 9.59 0.78 0 0
Gjb6 Gjb6 9.46 0.41 0 0
Angptl7 Angptl7 9.31 0.22 0 0
Ctxn3 Ctxn3 9.30 0.46 0 0
Hhip Hhip 9.26 0.36 0 0
Zic4 Zic4 9.26 0.47 0 0
Npy1r Npy1r 9.24 0.40 0 0
cat("\n=== Top 30 BM-enriched stroma genes ===\n")
## 
## === Top 30 BM-enriched stroma genes ===
de_stroma %>%
  filter(p_val_adj < 0.05, avg_log2FC < -0.5) %>%
  arrange(avg_log2FC) %>%
  select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  head(30) %>%
  kable(digits = c(NA, 2, 2, 2, 3))
gene avg_log2FC pct.1 pct.2 p_val_adj
Ibsp Ibsp -10.41 0.00 0.19 0
Mpo Mpo -9.87 0.00 0.20 0
Il1rn Il1rn -9.84 0.00 0.19 0
Adgrl4 Adgrl4 -9.48 0.00 0.46 0
Kng1 Kng1 -8.48 0.00 0.31 0
Cpa6 Cpa6 -8.36 0.00 0.14 0
Podnl1 Podnl1 -8.32 0.00 0.18 0
Gm32554 Gm32554 -8.11 0.00 0.12 0
Ctsg Ctsg -8.09 0.00 0.13 0
Hoxa10 Hoxa10 -7.87 0.00 0.22 0
Hoxa3 Hoxa3 -7.86 0.00 0.26 0
Shox2 Shox2 -7.79 0.00 0.14 0
Gna15 Gna15 -7.70 0.00 0.23 0
Mme Mme -7.68 0.00 0.30 0
Meis1 Meis1 -7.57 0.01 0.56 0
Fst Fst -7.54 0.00 0.14 0
9030619P08Rik 9030619P08Rik -7.51 0.00 0.22 0
Trarg1 Trarg1 -7.45 0.00 0.10 0
Padi4 Padi4 -7.43 0.00 0.20 0
Hoxa7 Hoxa7 -7.42 0.00 0.38 0
Gdpd2 Gdpd2 -7.40 0.00 0.24 0
Siglech Siglech -7.31 0.00 0.11 0
Gm56622 Gm56622 -7.29 0.00 0.25 0
Sh2d5 Sh2d5 -7.27 0.00 0.24 0
Gm47754 Gm47754 -7.23 0.00 0.18 0
Hmga2 Hmga2 -7.20 0.00 0.21 0
C6 C6 -7.19 0.00 0.14 0
Dntt Dntt -7.13 0.01 0.25 0
Cd27 Cd27 -7.11 0.01 0.42 0
Pparg Pparg -7.08 0.01 0.20 0
niche_genes <- c("Cxcl12", "Cxcl16", "Ptn", "Mdk", "Kitl", "Fn1",
                  "Col1a1", "Col3a1", "Igf1", "Tgfb1", "Tgfb2",
                  "Il33", "Wnt5a", "App", "Apoe", "Lama1",
                  "Pdgfra", "Pdgfrb", "Pecam1", "Rgs5",
                  "Mmp2", "Mmp14", "Postn", "Tnc",
                  "H2-Eb1", "Cd74", "Ptgds")

de_stroma_plot <- de_stroma %>%
  mutate(
    sig = case_when(
      p_val_adj < 0.05 & avg_log2FC > 0.5  ~ "CNS-enriched",
      p_val_adj < 0.05 & avg_log2FC < -0.5 ~ "BM-enriched",
      TRUE ~ "NS"
    ),
    label = ifelse(gene %in% niche_genes |
                     (p_val_adj < 1e-20 & abs(avg_log2FC) > 2),
                   gene, "")
  )

ggplot(de_stroma_plot, aes(x = avg_log2FC, y = -log10(p_val_adj),
                            colour = sig, label = label)) +
  geom_point(size = 0.8, alpha = 0.5) +
  geom_text_repel(size = 3, max.overlaps = 30,
                   fontface = "italic", segment.size = 0.2) +
  scale_colour_manual(values = c("CNS-enriched" = "#B2182B",
                                  "BM-enriched" = "#2166AC", "NS" = "grey80")) +
  geom_vline(xintercept = c(-0.5, 0.5), linetype = "dashed", colour = "grey60") +
  theme_minimal(base_size = 12) +
  labs(title = "Stroma: CNS vs BM", x = "log2FC (+ = CNS)", y = "-log10(padj)")

6b — Stroma GO Enrichment

# CNS-enriched
stroma_cns_genes <- de_stroma %>%
  filter(p_val_adj < 0.05, avg_log2FC > 0.5) %>%
  pull(gene)

# BM-enriched
stroma_bm_genes <- de_stroma %>%
  filter(p_val_adj < 0.05, avg_log2FC < -0.5) %>%
  pull(gene)

if (length(stroma_cns_genes) >= 5) {
  go_stroma_cns <- gost(stroma_cns_genes,
                         organism = "mmusculus",
                         sources = "GO:BP",
                         evcodes = TRUE,
                         significant = TRUE)
  
  if (!is.null(go_stroma_cns$result) && nrow(go_stroma_cns$result) > 0) {
    
    # Also show top terms as table
    go_stroma_cns$result %>%
      filter(source == "GO:BP") %>%
      arrange(p_value) %>%
      head(20) %>%
      select(term_name, p_value, term_size, intersection_size) %>%
      kable(digits = c(NA, 3, 0, 0),
            caption = "Top 20 GO:BP — Stroma CNS-Enriched")
  } else {
    cat("No significant GO terms for CNS-enriched stroma genes\n")
  }
} else {
  cat("Too few CNS-enriched stroma genes for GO analysis\n")
}
Top 20 GO:BP — Stroma CNS-Enriched
term_name p_value term_size intersection_size
system development 0 4155 641
multicellular organism development 0 4907 707
localization 0 5309 741
anatomical structure development 0 6280 818
developmental process 0 6873 868
transport 0 4364 625
establishment of localization 0 4666 653
positive regulation of biological process 0 6447 804
anatomical structure morphogenesis 0 2781 448
positive regulation of cellular process 0 6095 762
cellular localization 0 3493 506
animal organ development 0 3272 480
regulation of cell communication 0 3560 502
regulation of cellular component organization 0 2537 397
regulation of signaling 0 3558 501
regulation of signal transduction 0 2940 438
cell surface receptor signaling pathway 0 2731 413
nervous system development 0 2593 398
cell migration 0 1539 274
macromolecule localization 0 3054 436
if (length(stroma_bm_genes) >= 5) {
  go_stroma_bm <- gost(stroma_bm_genes,
                         organism = "mmusculus",
                         sources = "GO:BP",
                         evcodes = TRUE,
                         significant = TRUE)
  
  if (!is.null(go_stroma_bm$result) && nrow(go_stroma_bm$result) > 0) {
    
    go_stroma_bm$result %>%
      filter(source == "GO:BP") %>%
      arrange(p_value) %>%
      head(20) %>%
      select(term_name, p_value, term_size, intersection_size) %>%
      kable(digits = c(NA, 3, 0, 0),
            caption = "Top 20 GO:BP — Stroma BM-Enriched")
  } else {
    cat("No significant GO terms for BM-enriched stroma genes\n")
  }
} else {
  cat("Too few BM-enriched stroma genes for GO analysis\n")
}
Top 20 GO:BP — Stroma BM-Enriched
term_name p_value term_size intersection_size
positive regulation of biological process 0 6447 1303
positive regulation of cellular process 0 6095 1247
regulation of primary metabolic process 0 5176 1111
regulation of metabolic process 0 6679 1316
regulation of macromolecule metabolic process 0 6153 1240
positive regulation of metabolic process 0 3493 836
positive regulation of macromolecule metabolic process 0 3174 782
regulation of nucleobase-containing compound metabolic process 0 3842 882
regulation of biosynthetic process 0 5622 1137
regulation of macromolecule biosynthetic process 0 5426 1102
regulation of gene expression 0 5318 1084
negative regulation of cellular process 0 5540 1112
negative regulation of biological process 0 5759 1140
regulation of biological process 0 13108 2029
regulation of cellular process 0 12715 1983
biological regulation 0 13503 2067
positive regulation of biosynthetic process 0 2697 671
positive regulation of macromolecule biosynthetic process 0 2571 650
regulation of RNA metabolic process 0 3545 796
chromatin organization 0 842 320

6c — Macrophages: CNS vs BM

mac_only <- subset(myeloid, original_annotation == "Macrophages")
Idents(mac_only) <- "Tissue"

de_mac <- FindMarkers(mac_only,
                       ident.1 = "CNS", ident.2 = "BM",
                       test.use = "wilcox",
                       min.pct = 0.1, logfc.threshold = 0)
de_mac$gene <- rownames(de_mac)

cat("Macrophages — significant DE genes:", sum(de_mac$p_val_adj < 0.05), "\n")
## Macrophages — significant DE genes: 1300
cat("  CNS-enriched:", sum(de_mac$avg_log2FC > 0.5 & de_mac$p_val_adj < 0.05), "\n")
##   CNS-enriched: 738
cat("  BM-enriched:", sum(de_mac$avg_log2FC < -0.5 & de_mac$p_val_adj < 0.05), "\n")
##   BM-enriched: 419
cat("=== Top 30 CNS-enriched macrophage genes ===\n")
## === Top 30 CNS-enriched macrophage genes ===
de_mac %>%
  filter(p_val_adj < 0.05, avg_log2FC > 0.5) %>%
  arrange(desc(avg_log2FC)) %>%
  select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  head(30) %>%
  kable(digits = c(NA, 2, 2, 2, 3))
gene avg_log2FC pct.1 pct.2 p_val_adj
Ptgds Ptgds 10.29 0.10 0.00 0
Fcrls Fcrls 7.02 0.29 0.01 0
Mgl2 Mgl2 6.35 0.14 0.00 0
Smim1 Smim1 4.88 0.10 0.00 0
Cd209g Cd209g 4.65 0.16 0.01 0
Igfbp4 Igfbp4 4.64 0.38 0.04 0
Grap Grap 4.07 0.23 0.02 0
Rgs18 Rgs18 4.06 0.13 0.01 0
Tln2 Tln2 4.06 0.11 0.01 0
Afap1l1 Afap1l1 4.04 0.11 0.01 0
F13a1 F13a1 3.92 0.74 0.08 0
Mbp Mbp 3.77 0.12 0.02 0
Cd209f Cd209f 3.64 0.23 0.04 0
Mctp1 Mctp1 3.55 0.18 0.06 0
Ighm Ighm 3.50 0.18 0.03 0
Lpar6 Lpar6 3.48 0.10 0.01 0
Marcksl1 Marcksl1 3.40 0.31 0.06 0
Ccnd1 Ccnd1 3.30 0.40 0.07 0
Gpr183 Gpr183 3.25 0.15 0.03 0
Inka1 Inka1 3.22 0.12 0.02 0
Ctla2b Ctla2b 3.22 0.33 0.06 0
Akr1b8 Akr1b8 3.19 0.30 0.06 0
Cbr2 Cbr2 3.16 0.77 0.17 0
Rbpj Rbpj 3.05 0.44 0.11 0
Cx3cr1 Cx3cr1 3.04 0.14 0.04 0
Ednrb Ednrb 3.03 0.18 0.03 0
Etv1 Etv1 2.97 0.16 0.03 0
Lifr Lifr 2.86 0.13 0.02 0
Dab2 Dab2 2.73 0.82 0.30 0
Sema4a Sema4a 2.72 0.10 0.02 0
cat("\n=== Top 30 BM-enriched macrophage genes ===\n")
## 
## === Top 30 BM-enriched macrophage genes ===
de_mac %>%
  filter(p_val_adj < 0.05, avg_log2FC < -0.5) %>%
  arrange(avg_log2FC) %>%
  select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  head(30) %>%
  kable(digits = c(NA, 2, 2, 2, 3))
gene avg_log2FC pct.1 pct.2 p_val_adj
Paqr9 Paqr9 -4.34 0.01 0.16 0
Tmem26 Tmem26 -4.17 0.01 0.16 0
Ear2 Ear2 -4.08 0.04 0.50 0
Itgad Itgad -4.00 0.03 0.31 0
Gpd1 Gpd1 -3.94 0.02 0.21 0
Tspan10 Tspan10 -3.84 0.01 0.13 0
Cd5l Cd5l -3.81 0.09 0.70 0
Ubd Ubd -3.79 0.01 0.13 0
Treml4 Treml4 -3.69 0.01 0.11 0
Kcna2 Kcna2 -3.46 0.01 0.11 0
Pilrb1 Pilrb1 -3.45 0.02 0.21 0
Kcnj10 Kcnj10 -3.33 0.01 0.13 0
Ccr3 Ccr3 -3.31 0.03 0.31 0
Fgr Fgr -3.29 0.04 0.31 0
Mrap Mrap -3.28 0.04 0.30 0
Gfra2 Gfra2 -3.23 0.01 0.10 0
Clec1b Clec1b -2.89 0.03 0.22 0
Myo10 Myo10 -2.87 0.08 0.40 0
C3 C3 -2.81 0.03 0.15 0
Inf2 Inf2 -2.81 0.02 0.12 0
Crip2 Crip2 -2.80 0.05 0.28 0
Stab2 Stab2 -2.80 0.02 0.14 0
Vcam1 Vcam1 -2.79 0.33 0.88 0
Sdc3 Sdc3 -2.78 0.23 0.72 0
Pilra Pilra -2.74 0.07 0.39 0
Slc16a9 Slc16a9 -2.68 0.02 0.13 0
Itgax Itgax -2.66 0.01 0.10 0
Card11 Card11 -2.65 0.02 0.16 0
Sema6d Sema6d -2.65 0.03 0.14 0
Ccdc148 Ccdc148 -2.63 0.02 0.15 0
mac_label_genes <- c("Trem2", "Apoe", "Axl", "Cd9", "Spp1", "Gpnmb",
                      "Mrc1", "Lyve1", "Cd163", "Arg1",
                      "H2-Eb1", "H2-Ab1", "Cd74", "Ciita",
                      "Tnf", "Il1b", "Nos2", "Ccl2",
                      "C1qa", "C1qb", "Tgfb1", "Igf1",
                      "Lyz2", "Ly6c2", "Ccr2", "S100a4")

de_mac_plot <- de_mac %>%
  mutate(
    sig = case_when(
      p_val_adj < 0.05 & avg_log2FC > 0.5  ~ "CNS-enriched",
      p_val_adj < 0.05 & avg_log2FC < -0.5 ~ "BM-enriched",
      TRUE ~ "NS"
    ),
    label = ifelse(gene %in% mac_label_genes |
                     (p_val_adj < 1e-20 & abs(avg_log2FC) > 2),
                   gene, "")
  )

ggplot(de_mac_plot, aes(x = avg_log2FC, y = -log10(p_val_adj),
                         colour = sig, label = label)) +
  geom_point(size = 0.8, alpha = 0.5) +
  geom_text_repel(size = 3, max.overlaps = 30,
                   fontface = "italic", segment.size = 0.2) +
  scale_colour_manual(values = c("CNS-enriched" = "#B2182B",
                                  "BM-enriched" = "#2166AC", "NS" = "grey80")) +
  geom_vline(xintercept = c(-0.5, 0.5), linetype = "dashed", colour = "grey60") +
  theme_minimal(base_size = 12) +
  labs(title = "Macrophages: CNS vs BM", x = "log2FC (+ = CNS)", y = "-log10(padj)")

6d — Macrophage GO Enrichment

mac_cns_genes <- de_mac %>%
  filter(p_val_adj < 0.05, avg_log2FC > 0.5) %>%
  pull(gene)

mac_bm_genes <- de_mac %>%
  filter(p_val_adj < 0.05, avg_log2FC < -0.5) %>%
  pull(gene)

if (length(mac_cns_genes) >= 5) {
  go_mac_cns <- gost(mac_cns_genes,
                      organism = "mmusculus",
                      sources = "GO:BP",
                      evcodes = TRUE,
                      significant = TRUE)
  
  if (!is.null(go_mac_cns$result) && nrow(go_mac_cns$result) > 0) {
    
    go_mac_cns$result %>%
      filter(source == "GO:BP") %>%
      arrange(p_value) %>%
      head(20) %>%
      select(term_name, p_value, term_size, intersection_size) %>%
      kable(digits = c(NA, 3, 0, 0),
            caption = "Top 20 GO:BP — Macrophage CNS-Enriched")
  } else {
    cat("No significant GO terms for CNS-enriched macrophage genes\n")
  }
} else {
  cat("Too few CNS-enriched macrophage genes for GO analysis\n")
}
Top 20 GO:BP — Macrophage CNS-Enriched
term_name p_value term_size intersection_size
vesicle-mediated transport 0 1524 125
response to stress 0 3917 209
transport 0 4364 218
establishment of localization 0 4666 226
endocytosis 0 686 68
cellular localization 0 3493 184
positive regulation of biological process 0 6447 281
establishment of localization in cell 0 2036 128
negative regulation of biological process 0 5759 258
biological regulation 0 13503 478
positive regulation of cellular process 0 6095 267
regulation of biological process 0 13108 466
localization 0 5309 241
regulation of cellular process 0 12715 452
negative regulation of cellular process 0 5540 245
cellular process 0 21471 651
intracellular transport 0 1337 92
endosomal transport 0 269 37
immune system process 0 2848 149
import into cell 0 900 70
if (length(mac_bm_genes) >= 5) {
  go_mac_bm <- gost(mac_bm_genes,
                      organism = "mmusculus",
                      sources = "GO:BP",
                      evcodes = TRUE,
                      significant = TRUE)
  
  if (!is.null(go_mac_bm$result) && nrow(go_mac_bm$result) > 0) {
    
    go_mac_bm$result %>%
      filter(source == "GO:BP") %>%
      arrange(p_value) %>%
      head(20) %>%
      select(term_name, p_value, term_size, intersection_size) %>%
      kable(digits = c(NA, 3, 0, 0),
            caption = "Top 20 GO:BP — Macrophage BM-Enriched")
  } else {
    cat("No significant GO terms for BM-enriched macrophage genes\n")
  }
} else {
  cat("Too few BM-enriched macrophage genes for GO analysis\n")
}
Top 20 GO:BP — Macrophage BM-Enriched
term_name p_value term_size intersection_size
localization 0 5309 196
transport 0 4364 171
establishment of localization 0 4666 174
import into cell 0 900 69
immune system process 0 2848 119
endocytosis 0 686 53
developmental process 0 6873 194
anatomical structure development 0 6280 183
response to stress 0 3917 136
cell migration 0 1539 78
cell motility 0 1804 83
transmembrane transport 0 1525 75
regulation of immune system process 0 1586 76
response to stimulus 0 10427 246
phagocytosis 0 221 29
regulation of cell migration 0 978 57
homeostatic process 0 1824 80
monoatomic ion transport 0 1235 64
positive regulation of biological process 0 6447 176
regulation of cell motility 0 1034 58

6e — CNS Microglia-like vs Macrophage GO

mg_enriched <- de_mac_vs_mg %>%
  filter(p_val_adj < 0.05, avg_log2FC > 0.5) %>%
  pull(gene)

mac_enriched <- de_mac_vs_mg %>%
  filter(p_val_adj < 0.05, avg_log2FC < -0.5) %>%
  pull(gene)

if (length(mg_enriched) >= 5) {
  go_mg <- gost(mg_enriched,
                 organism = "mmusculus",
                 sources = "GO:BP",
                 evcodes = TRUE,
                 significant = TRUE)
  
  if (!is.null(go_mg$result) && nrow(go_mg$result) > 0) {
    
    go_mg$result %>%
      filter(source == "GO:BP") %>%
      arrange(p_value) %>%
      head(20) %>%
      select(term_name, p_value, term_size, intersection_size) %>%
      kable(digits = c(NA, 3, 0, 0),
            caption = "Top 20 GO:BP — Microglia-like Enriched")
  }
}
Top 20 GO:BP — Microglia-like Enriched
term_name p_value term_size intersection_size
positive regulation of biological process 0 6447 1282
positive regulation of cellular process 0 6095 1231
regulation of primary metabolic process 0 5176 1098
regulation of metabolic process 0 6679 1287
positive regulation of metabolic process 0 3493 815
protein metabolic process 0 4467 953
macromolecule modification 0 2695 676
protein modification process 0 2540 650
regulation of macromolecule metabolic process 0 6153 1187
positive regulation of macromolecule metabolic process 0 3174 750
biological regulation 0 13503 2073
regulation of biological process 0 13108 2021
regulation of cellular process 0 12715 1974
regulation of nucleobase-containing compound metabolic process 0 3842 823
regulation of biosynthetic process 0 5622 1072
regulation of macromolecule biosynthetic process 0 5426 1034
intracellular signal transduction 0 2810 648
localization 0 5309 1015
regulation of gene expression 0 5318 1016
regulation of RNA metabolic process 0 3545 757
if (length(mac_enriched) >= 5) {
  go_mac_enr <- gost(mac_enriched,
                       organism = "mmusculus",
                       sources = "GO:BP",
                       evcodes = TRUE,
                       significant = TRUE)
  
  if (!is.null(go_mac_enr$result) && nrow(go_mac_enr$result) > 0) {
    
    go_mac_enr$result %>%
      filter(source == "GO:BP") %>%
      arrange(p_value) %>%
      head(20) %>%
      select(term_name, p_value, term_size, intersection_size) %>%
      kable(digits = c(NA, 3, 0, 0),
            caption = "Top 20 GO:BP — CNS Macrophage Enriched")
  }
}
Top 20 GO:BP — CNS Macrophage Enriched
term_name p_value term_size intersection_size
translation 0 716 159
cytoplasmic translation 0 167 80
generation of precursor metabolites and energy 0 468 123
aerobic respiration 0 200 78
cellular respiration 0 245 85
small molecule metabolic process 0 1762 238
energy derivation by oxidation of organic compounds 0 347 98
oxidative phosphorylation 0 145 64
organelle organization 0 3494 355
catabolic process 0 2530 288
protein metabolic process 0 4467 417
cellular process 0 21471 1203
nucleoside phosphate metabolic process 0 639 125
nucleobase-containing small molecule metabolic process 0 666 127
translation at synapse 0 52 38
translation at postsynapse 0 52 38
purine-containing compound metabolic process 0 571 113
purine nucleotide metabolic process 0 428 97
nucleotide metabolic process 0 484 103
translation at presynapse 0 51 37

7. CellChat Validation on Niche Cells

7a — TREM2/APP Axis

trem2_genes <- c("Trem2", "Tyrobp", "App", "Apoe", "Cd74", "Sort1")
trem2_present <- trem2_genes[trem2_genes %in% rownames(myeloid)]

# On myeloid subclusters
VlnPlot(myeloid, features = trem2_present,
        group.by = "seurat_clusters", split.by = "Tissue",
        pt.size = 0, ncol = 3,
        cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) &
  theme(axis.text.x = element_text(size = 9))

7b — Retention Chemokines (on Stroma)

retention_genes <- c("Cxcl12", "Cxcl16", "Vcam1", "Icam1",
                      "Ccl2", "Ccl7", "Cxcl14", "Kitl")
retention_present <- retention_genes[retention_genes %in% rownames(stroma)]

VlnPlot(stroma, features = retention_present,
        group.by = "seurat_clusters", split.by = "Tissue",
        pt.size = 0, ncol = 4,
        cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) &
  theme(axis.text.x = element_text(size = 9))

7c — Immunosuppressive Molecules

immunosupp <- c("Lgals9", "Cd274", "Pdcd1lg2", "Tgfb1",
                 "Il10", "Arg1", "Nt5e", "Ptgs2")
immunosupp_present <- immunosupp[immunosupp %in% rownames(myeloid)]

VlnPlot(myeloid, features = immunosupp_present,
        group.by = "seurat_clusters", split.by = "Tissue",
        pt.size = 0, ncol = 4,
        cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) &
  theme(axis.text.x = element_text(size = 9))

7d — IL-17 Receptors on Niche Cells

il17r_genes <- c("Il17ra", "Il17rb", "Il17rc", "Il17rd", "Il17re")

# Check on stroma
il17r_stroma <- il17r_genes[il17r_genes %in% rownames(stroma)]
if (length(il17r_stroma) > 0) {
  print(VlnPlot(stroma, features = il17r_stroma,
                group.by = "seurat_clusters", split.by = "Tissue",
                pt.size = 0, ncol = 3,
                cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) &
          theme(axis.text.x = element_text(size = 9)) &
          ggtitle("IL-17R — Stroma"))
}

# Check on myeloid
il17r_myeloid <- il17r_genes[il17r_genes %in% rownames(myeloid)]
if (length(il17r_myeloid) > 0) {
  print(VlnPlot(myeloid, features = il17r_myeloid,
                group.by = "seurat_clusters", split.by = "Tissue",
                pt.size = 0, ncol = 3,
                cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) &
          theme(axis.text.x = element_text(size = 9)) &
          ggtitle("IL-17R — Myeloid"))
}


8. Export

# DE results
write.csv(de_stroma, file.path(cache_dir, "13b_DE_stroma_CNS_vs_BM.csv"),
          row.names = FALSE)
write.csv(de_mac, file.path(cache_dir, "13b_DE_macrophages_CNS_vs_BM.csv"),
          row.names = FALSE)
write.csv(de_mac_vs_mg, file.path(cache_dir, "13b_DE_CNS_macrophages_vs_microglia.csv"),
          row.names = FALSE)

# Cluster markers
write.csv(stroma_markers, file.path(cache_dir, "13b_stroma_subcluster_markers.csv"),
          row.names = FALSE)
write.csv(myeloid_markers, file.path(cache_dir, "13b_myeloid_subcluster_markers.csv"),
          row.names = FALSE)

# GO results (if they exist)

# Helper to flatten gprofiler2 list columns for CSV export
write_gost <- function(gost_obj, path) {
  if (!is.null(gost_obj$result)) {
    df <- gost_obj$result %>%
      select(-any_of(c("parents", "intersection")))
    write.csv(df, path, row.names = FALSE)
  }
}

if (exists("go_stroma_cns")) write_gost(go_stroma_cns, file.path(cache_dir, "13b_GO_stroma_CNS_enriched.csv"))
if (exists("go_stroma_bm")) write_gost(go_stroma_bm, file.path(cache_dir, "13b_GO_stroma_BM_enriched.csv"))
if (exists("go_mac_cns")) write_gost(go_mac_cns, file.path(cache_dir, "13b_GO_macrophage_CNS_enriched.csv"))
if (exists("go_mac_bm")) write_gost(go_mac_bm, file.path(cache_dir, "13b_GO_macrophage_BM_enriched.csv"))
if (exists("go_mg")) write_gost(go_mg, file.path(cache_dir, "13b_GO_microglia_vs_mac.csv"))

# Save subclustered objects
qs_save(stroma, file.path(cache_dir, "13b_stroma_subclustered.qs"))
qs_save(myeloid, file.path(cache_dir, "13b_myeloid_subclustered.qs"))

cat("All results saved to:", cache_dir, "\n")
## All results saved to: /exports/eddie/scratch/aduguid3/harmony_clustering

Session Information

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Rocky Linux 9.5 (Blue Onyx)
## 
## Matrix products: default
## BLAS/LAPACK: /opt/intel/oneapi/mkl/2024.0/lib/libmkl_gf_lp64.so.2;  LAPACK version 3.10.1
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/London
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] UCell_2.10.1          knitr_1.51            circlize_0.4.17      
##  [4] gprofiler2_0.2.4      ComplexHeatmap_2.22.0 patchwork_1.3.2      
##  [7] ggrepel_0.9.6         ggplot2_4.0.2         tidyr_1.3.1          
## [10] dplyr_1.1.4           qs2_0.1.7             Seurat_5.4.0         
## [13] SeuratObject_5.3.0    sp_2.1-4             
## 
## loaded via a namespace (and not attached):
##   [1] RcppAnnoy_0.0.23            splines_4.4.1              
##   [3] later_1.4.7                 bitops_1.0-9               
##   [5] tibble_3.3.1                polyclip_1.10-7            
##   [7] fastDummies_1.7.5           lifecycle_1.0.5            
##   [9] doParallel_1.0.17           globals_0.16.3             
##  [11] lattice_0.22-6              MASS_7.3-60.2              
##  [13] magrittr_2.0.3              limma_3.62.2               
##  [15] plotly_4.12.0               sass_0.4.9                 
##  [17] rmarkdown_2.30              jquerylib_0.1.4            
##  [19] yaml_2.3.12                 httpuv_1.6.16              
##  [21] otel_0.2.0                  sctransform_0.4.3          
##  [23] spam_2.11-3                 spatstat.sparse_3.1-0      
##  [25] reticulate_1.45.0           cowplot_1.2.0              
##  [27] pbapply_1.7-4               RColorBrewer_1.1-3         
##  [29] abind_1.4-5                 zlibbioc_1.52.0            
##  [31] Rtsne_0.17                  GenomicRanges_1.58.0       
##  [33] purrr_1.0.2                 presto_1.0.0               
##  [35] RCurl_1.98-1.17             BiocGenerics_0.52.0        
##  [37] GenomeInfoDbData_1.2.13     IRanges_2.40.1             
##  [39] S4Vectors_0.44.0            irlba_2.3.7                
##  [41] listenv_0.9.1               spatstat.utils_3.2-1       
##  [43] goftest_1.2-3               RSpectra_0.16-2            
##  [45] spatstat.random_3.4-4       fitdistrplus_1.2-6         
##  [47] parallelly_1.37.1           codetools_0.2-20           
##  [49] DelayedArray_0.32.0         tidyselect_1.2.1           
##  [51] shape_1.4.6.1               UCSC.utils_1.2.0           
##  [53] farver_2.1.2                matrixStats_1.5.0          
##  [55] stats4_4.4.1                spatstat.explore_3.7-0     
##  [57] jsonlite_2.0.0              GetoptLong_1.1.0           
##  [59] BiocNeighbors_2.0.1         progressr_0.18.0           
##  [61] ggridges_0.5.6              survival_3.6-4             
##  [63] iterators_1.0.14            foreach_1.5.2              
##  [65] tools_4.4.1                 ica_1.0-3                  
##  [67] Rcpp_1.0.12                 glue_1.8.0                 
##  [69] gridExtra_2.3               SparseArray_1.6.2          
##  [71] xfun_0.56                   MatrixGenerics_1.18.1      
##  [73] GenomeInfoDb_1.42.3         withr_3.0.2                
##  [75] fastmap_1.2.0               fansi_1.0.6                
##  [77] digest_0.6.35               R6_2.6.1                   
##  [79] mime_0.12                   colorspace_2.1-0           
##  [81] scattermore_1.2             tensor_1.5.1               
##  [83] dichromat_2.0-0.1           spatstat.data_3.1-9        
##  [85] utf8_1.2.4                  generics_0.1.3             
##  [87] data.table_1.15.4           httr_1.4.7                 
##  [89] htmlwidgets_1.6.4           S4Arrays_1.6.0             
##  [91] uwot_0.2.4                  pkgconfig_2.0.3            
##  [93] gtable_0.3.6                lmtest_0.9-40              
##  [95] S7_0.2.1                    SingleCellExperiment_1.28.1
##  [97] XVector_0.46.0              htmltools_0.5.8.1          
##  [99] dotCall64_1.2               clue_0.3-67                
## [101] scales_1.4.0                Biobase_2.66.0             
## [103] png_0.1-8                   spatstat.univar_3.1-6      
## [105] rstudioapi_0.16.0           reshape2_1.4.4             
## [107] rjson_0.2.23                curl_7.0.0                 
## [109] nlme_3.1-164                cachem_1.1.0               
## [111] zoo_1.8-15                  GlobalOptions_0.1.3        
## [113] stringr_1.5.1               KernSmooth_2.23-24         
## [115] vipor_0.4.7                 parallel_4.4.1             
## [117] miniUI_0.1.2                ggrastr_1.0.2              
## [119] pillar_1.9.0                vctrs_0.6.5                
## [121] RANN_2.6.2                  promises_1.5.0             
## [123] stringfish_0.18.0           xtable_1.8-8               
## [125] cluster_2.1.6               beeswarm_0.4.0             
## [127] evaluate_1.0.5              cli_3.6.5                  
## [129] compiler_4.4.1              rlang_1.1.7                
## [131] crayon_1.5.2                future.apply_1.11.2        
## [133] labeling_0.4.3              ggbeeswarm_0.7.3           
## [135] plyr_1.8.9                  stringi_1.8.4              
## [137] viridisLite_0.4.2           deldir_2.0-4               
## [139] BiocParallel_1.40.2         lazyeval_0.2.2             
## [141] spatstat.geom_3.7-0         Matrix_1.7-0               
## [143] RcppHNSW_0.6.0              future_1.33.2              
## [145] statmod_1.5.1               shiny_1.13.0               
## [147] SummarizedExperiment_1.36.0 ROCR_1.0-12                
## [149] igraph_2.2.2                RcppParallel_5.1.8         
## [151] bslib_0.7.0