Overview

Before running NicheNet, we need to properly characterise the niche populations identified in the CellChat analysis. This script answers:

What are these cells? (marker panels from CNS border / niche literature)
How do they differ between CNS and BM? (tissue-specific DE)
What functional programmes are active? (module scoring)

Populations: Stroma, Macrophages, Microglia-like macrophages

Setup

library(Seurat)
library(qs2)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggrepel)
library(patchwork)
library(ComplexHeatmap)
library(gprofiler2)
library(circlize)
library(knitr)
library(UCell)

base_path <- "/exports/eddie/scratch/aduguid3"
cache_dir <- file.path(base_path, "harmony_clustering")

obj <- qs_read(file.path(cache_dir, "12_combined_seurat.qs"))
DefaultAssay(obj) <- "originalexp"
obj <- NormalizeData(obj, verbose = FALSE)

cat("Total cells:", ncol(obj), "\n")

## Total cells: 109322

cat("\nCell types:\n")

## 
## Cell types:

print(table(obj$cell_annotation))

## 
##                      B-ALL                  Basophils 
##                      91623                        555 
##                  CD8_Naive    CD8_Teff (unclassified) 
##                       1768                         80 
##                    CD8_Tex                   CD8_Tpex 
##                        284                        143 
##        CD8+ NKT-like cells      Effector CD4+ T cells 
##                        288                        814 
##                Macrophages Microglia-like macrophages 
##                       2031                       1540 
##    Myeloid Dendritic cells              Naive B cells 
##                        117                         97 
##      Natural killer  cells                Neutrophils 
##                        979                       1145 
##                   NKT-like    Non-classical monocytes 
##                        388                        606 
##                   Other γδ             Plasma B cells 
##                        505                        844 
##                Pre-B cells           Progenitor cells 
##                       3098                        714 
##                     Stroma                      T_eff 
##                       1442                         66 
##                        Vγ4                     Vγ6Vδ4 
##                         72                        123

cat("\nBy tissue:\n")

## 
## By tissue:

print(table(obj$cell_annotation, obj$Tissue))

##                             
##                                 BM   CNS
##   B-ALL                      44606 47017
##   Basophils                    523    32
##   CD8_Naive                   1639   129
##   CD8_Teff (unclassified)       49    31
##   CD8_Tex                      183   101
##   CD8_Tpex                     113    30
##   CD8+ NKT-like cells          173   115
##   Effector CD4+ T cells        631   183
##   Macrophages                 1127   904
##   Microglia-like macrophages    64  1476
##   Myeloid Dendritic cells       76    41
##   Naive B cells                 94     3
##   Natural killer  cells        887    92
##   Neutrophils                 1103    42
##   NKT-like                     219   169
##   Non-classical monocytes      356   250
##   Other γδ                     253   252
##   Plasma B cells               754    90
##   Pre-B cells                 2893   205
##   Progenitor cells             709     5
##   Stroma                       719   723
##   T_eff                         32    34
##   Vγ4                           40    32
##   Vγ6Vδ4                         2   121

1. Global Niche UMAP

1a — Compute UMAP

niche_types <- c("Stroma", "Macrophages", "Microglia-like macrophages")
niche <- subset(obj, cell_annotation %in% niche_types)
cat("Niche cells:", ncol(niche), "\n")

## Niche cells: 5013

print(table(niche$cell_annotation, niche$Tissue))

##                             
##                                BM  CNS
##   Macrophages                1127  904
##   Microglia-like macrophages   64 1476
##   Stroma                      719  723

niche <- NormalizeData(niche, verbose = FALSE)
niche <- FindVariableFeatures(niche, nfeatures = 2000, verbose = FALSE)
niche <- ScaleData(niche, verbose = FALSE)
niche <- RunPCA(niche, npcs = 20, verbose = FALSE)
niche <- RunUMAP(niche, dims = 1:15, verbose = FALSE)

cat("Niche UMAP computed\n")

## Niche UMAP computed

1b — Overview Plots

p1 <- DimPlot(niche, group.by = "cell_annotation",
              cols = c("Stroma" = "#228B22", "Macrophages" = "#FF7F00",
                        "Microglia-like macrophages" = "#984EA3"),
              label = TRUE, label.size = 4) +
  ggtitle("Niche Populations")

p2 <- DimPlot(niche, group.by = "Tissue",
              cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) +
  ggtitle("By Tissue")

p3 <- DimPlot(niche, group.by = "cell_annotation",
              split.by = "Tissue",
              cols = c("Stroma" = "#228B22", "Macrophages" = "#FF7F00",
                        "Microglia-like macrophages" = "#984EA3")) +
  ggtitle("Split by Tissue") + NoLegend()

(p1 + p2) / p3

1c — Key Markers on Niche UMAP

niche_markers <- c("Col1a1", "Pdgfra", "Pdgfrb",        # stroma
                    "Adgre1", "Cd68", "Mrc1",             # macrophage
                    "P2ry12", "Tmem119", "Hexb",           # microglia
                    "Trem2", "Apoe", "Cxcl12")             # functional
niche_markers_present <- niche_markers[niche_markers %in% rownames(niche)]

FeaturePlot(niche, features = niche_markers_present,
            order = TRUE, ncol = 4,
            cols = c("lightgrey", "darkred")) &
  theme(plot.title = element_text(face = "bold.italic", size = 10))

2. Stroma Subclustering

2a — Subcluster Stroma

stroma <- subset(niche, cell_annotation == "Stroma")
cat("Stroma cells:", ncol(stroma), "\n")

## Stroma cells: 1442

print(table(stroma$Tissue))

## 
##  BM CNS 
## 719 723

stroma <- NormalizeData(stroma, verbose = FALSE)
stroma <- FindVariableFeatures(stroma, nfeatures = 2000, verbose = FALSE)
stroma <- ScaleData(stroma, verbose = FALSE)
stroma <- RunPCA(stroma, npcs = 15, verbose = FALSE)
stroma <- RunUMAP(stroma, dims = 1:10, verbose = FALSE)
stroma <- FindNeighbors(stroma, dims = 1:10, verbose = FALSE)

# Try a few resolutions
for (res in c(0.3, 0.5, 0.8)) {
  stroma <- FindClusters(stroma, resolution = res, verbose = FALSE)
  cat("Resolution", res, "->", length(unique(stroma@meta.data[[paste0("originalexp_snn_res.", res)]])), "clusters\n")
}

## Resolution 0.3 -> 6 clusters
## Resolution 0.5 -> 9 clusters
## Resolution 0.8 -> 11 clusters

# Default to res 0.5 — adjust if needed
stroma <- FindClusters(stroma, resolution = 0.5, verbose = FALSE)
Idents(stroma) <- "seurat_clusters"

p1 <- DimPlot(stroma, group.by = "seurat_clusters",
              label = TRUE, label.size = 5) +
  ggtitle("Stroma Subclusters")

p2 <- DimPlot(stroma, group.by = "Tissue",
              cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) +
  ggtitle("By Tissue")

p1 + p2

stroma_comp <- stroma@meta.data %>%
  count(seurat_clusters, Tissue) %>%
  group_by(seurat_clusters) %>%
  mutate(total = sum(n), pct = round(n / total * 100, 1)) %>%
  ungroup()

cat("Stroma cluster composition:\n")

## Stroma cluster composition:

print(stroma_comp %>% pivot_wider(id_cols = c(seurat_clusters, total),
                                    names_from = Tissue,
                                    values_from = n, values_fill = 0))

## # A tibble: 9 × 4
##   seurat_clusters total   CNS    BM
##   <fct>           <int> <int> <int>
## 1 0                 294   294     0
## 2 1                 284     7   277
## 3 2                 275   275     0
## 4 3                 161     2   159
## 5 4                 122   122     0
## 6 5                 122     4   118
## 7 6                 111     0   111
## 8 7                  55     1    54
## 9 8                  18    18     0

2b — Stroma Cluster Markers

stroma_markers <- FindAllMarkers(stroma, only.pos = TRUE,
                                  min.pct = 0.2, logfc.threshold = 0.5,
                                  verbose = FALSE)
cat("Total marker genes:", nrow(stroma_markers), "\n")

## Total marker genes: 19694

stroma_markers %>%
  group_by(cluster) %>%
  slice_max(avg_log2FC, n = 10) %>%
  select(cluster, gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  kable(digits = c(NA, NA, 2, 2, 2, 3), caption = "Top 10 markers per stroma subcluster")

Top 10 markers per stroma subcluster
cluster	gene	avg_log2FC	pct.1	pct.2
0	Ptgds	4.09	0.34	0.09
0	Penk	3.35	0.65	0.19
0	Crym	2.82	0.25	0.10
0	Asgr1	2.68	0.29	0.09
0	Igfbp2	2.61	0.45	0.15
0	Reg3g	2.54	0.28	0.08
0	Ptn	2.49	0.88	0.32
0	Crabp2	2.46	0.30	0.09
0	Apod	2.26	0.98	0.40
0	Atp6v0e2	2.25	0.29	0.14
1	Ibsp	11.99	0.48	0.00
1	Ebf3	10.23	0.67	0.00
1	Shox2	9.40	0.35	0.00
1	Cpa6	9.34	0.35	0.00
1	Dlx5	9.21	0.26	0.00
1	Gdpd2	9.04	0.60	0.00
1	Hoxc9	9.00	0.24	0.00
1	Prr16	8.94	0.47	0.01
1	Inhbb	8.93	0.34	0.00
1	Csmd1	8.91	0.88	0.06
2	Chil1	4.44	0.35	0.04
2	Gm31641	3.98	0.32	0.03
2	Epha3	3.83	0.79	0.10
2	Mmp3	3.78	0.33	0.04
2	Itga8	3.73	0.32	0.02
2	Cemip	3.64	0.75	0.10
2	Zfp385b	3.61	0.21	0.01
2	Nox4	3.54	0.58	0.04
2	Frmpd4	3.53	0.40	0.04
2	Adam33	3.41	0.46	0.03
3	Gm30948	10.29	0.22	0.00
3	Gm32196	9.17	0.32	0.00
3	Siglech	9.14	0.49	0.00
3	Rag2	9.02	0.36	0.00
3	Klrd1	8.67	0.58	0.00
3	Gm34680	8.53	0.52	0.00
3	Cox6a2	8.53	0.81	0.01
3	Gm56536	8.13	0.33	0.00
3	Blk	7.82	0.37	0.00
3	Col19a1	7.60	0.24	0.00
4	Chrm3	4.07	0.75	0.09
4	Nlgn1	3.99	0.39	0.05
4	Lrrc38	3.69	0.22	0.02
4	Nalcn	3.65	0.39	0.03
4	A330076H08Rik	3.60	0.26	0.04
4	Kcnb2	3.56	0.72	0.10
4	Rtl4	3.56	0.35	0.05
4	Kctd8	3.43	0.51	0.10
4	Susd4	3.40	0.20	0.02
4	Slc9b1	3.38	0.26	0.04
5	Gp9	6.64	0.57	0.04
5	Parvb	6.59	0.23	0.02
5	Gp1ba	6.33	0.37	0.01
5	Treml1	6.30	0.43	0.02
5	F2rl2	6.14	0.43	0.02
5	Gm15915	5.61	0.51	0.02
5	Gp5	5.56	0.51	0.02
5	Il1b	5.55	0.24	0.01
5	Pf4	5.42	0.57	0.04
5	Mmrn1	4.98	0.37	0.02
6	Gm56930	6.54	0.44	0.01
6	Cd200r4	5.70	0.21	0.00
6	Clec4e	5.47	0.58	0.03
6	Cxcr2	5.30	0.26	0.01
6	Lincred1	5.27	0.21	0.01
6	Ctsg	5.10	0.47	0.04
6	Mpo	5.05	0.51	0.06
6	Csf3r	4.99	0.61	0.04
6	Prtn3	4.65	0.98	0.19
6	Gm34703	4.63	0.39	0.03
7	Ankle1	8.39	0.54	0.00
7	Pif1	7.86	0.46	0.00
7	Ska1	7.27	0.60	0.01
7	Mxd3	7.25	0.42	0.00
7	Fam83d	7.23	0.38	0.00
7	Ttk	7.20	0.69	0.01
7	Pimreg	7.01	0.74	0.01
7	Kif2c	6.98	0.64	0.01
7	Kif18b	6.75	0.76	0.01
7	H3c3	6.72	0.60	0.01
8	2900040C04Rik	14.77	0.83	0.00
8	Ttr	13.81	0.89	0.02
8	Prr32	12.87	0.83	0.00
8	Ppp1r1b	12.87	0.94	0.00
8	Otx2	12.54	0.83	0.00
8	Kl	12.53	0.72	0.00
8	Foxj1	11.89	0.67	0.00
8	Vat1l	11.51	0.78	0.00
8	Pcp4	11.48	0.61	0.00
8	Cox8b	11.29	0.33	0.00

top_genes <- stroma_markers %>%
  group_by(cluster) %>%
  slice_max(avg_log2FC, n = 8) %>%
  pull(gene) %>% unique()

DoHeatmap(stroma, features = top_genes, size = 3,
          group.colors = scales::hue_pal()(length(unique(stroma$seurat_clusters)))) +
  theme(axis.text.y = element_text(face = "italic", size = 7)) +
  ggtitle("Stroma Subcluster Markers")

2c — Identity Marker Panels

stroma_id_genes <- list(
  "Meningeal_fibroblast" = c("Col1a1", "Col1a2", "Col3a1", "Dcn", "Lum",
                               "Pdgfra", "Fbln1", "Mgp"),
  "Pericyte" = c("Pdgfrb", "Rgs5", "Acta2", "Des", "Notch3",
                   "Kcnj8", "Abcc9", "Vtn"),
  "Endothelial" = c("Pecam1", "Cdh5", "Flt1", "Kdr", "Cldn5",
                      "Emcn", "Plvap"),
  "Leptomeningeal" = c("Slc47a1", "Aldh1a2", "Crabp2", "Ptgds", "Slc6a13"),
  "CAF_like" = c("Acta2", "Fap", "Postn", "Tnc", "Mmp2", "Mmp14"),
  "Niche_support" = c("Cxcl12", "Cxcl16", "Kitl", "Ptn", "Mdk",
                        "Igf1", "Wnt5a", "Il33"),
  "Immunomodulatory" = c("Cd274", "Pdcd1lg2", "Tgfb1", "Tgfb2",
                           "Nt5e", "Ptgs2")
)

all_stroma_id <- unique(unlist(stroma_id_genes))
present_stroma_id <- all_stroma_id[all_stroma_id %in% rownames(stroma)]
cat("Identity genes present:", length(present_stroma_id), "/", length(all_stroma_id), "\n")

## Identity genes present: 47 / 47

DotPlot(stroma, features = present_stroma_id,
        cols = c("lightgrey", "darkred"), dot.scale = 5) +
  RotatedAxis() +
  ggtitle("Stroma Identity Markers by Subcluster") +
  theme(axis.text.x = element_text(face = "italic", size = 7))

key_stroma <- c("Col1a1", "Pdgfra", "Pdgfrb", "Rgs5", "Pecam1", "Cdh5",
                 "Cxcl12", "Cxcl16", "Ptn", "Mdk", "Kitl", "Tgfb1")
key_stroma_present <- key_stroma[key_stroma %in% rownames(stroma)]

FeaturePlot(stroma, features = key_stroma_present,
            order = TRUE, ncol = 4,
            cols = c("lightgrey", "darkred")) &
  theme(plot.title = element_text(face = "bold.italic", size = 10))

2d — Stroma Functional Scoring

stroma_modules <- list(
  ECM_remodelling = c("Col1a1", "Col1a2", "Col3a1", "Fn1", "Tnc",
                       "Postn", "Mmp2", "Mmp14", "Timp1", "Lox"),
  Growth_support = c("Ptn", "Mdk", "Igf1", "Igf2", "Fgf2",
                      "Kitl", "Wnt5a", "Pdgfa"),
  Niche_retention = c("Cxcl12", "Cxcl16", "Vcam1", "Icam1",
                        "Fn1", "Lama1", "Lamb1"),
  Angiogenic = c("Vegfa", "Vegfb", "Angpt1", "Angpt2",
                   "Pdgfb", "Hif1a", "Flt1"),
  Immunomodulatory = c("Tgfb1", "Tgfb2", "Il33", "Cd274",
                         "Pdcd1lg2", "Nt5e", "Ptgs2"),
  CAF_programme = c("Acta2", "Fap", "Postn", "Tnc", "Fn1",
                      "Mmp2", "Mmp9", "Mmp14")
)

stroma_mods_filt <- lapply(stroma_modules, function(g) g[g %in% rownames(stroma)])
stroma_mods_filt <- stroma_mods_filt[sapply(stroma_mods_filt, length) >= 3]

stroma <- AddModuleScore_UCell(stroma, features = stroma_mods_filt, name = "_UCell")

stroma_score_cols <- grep("_UCell$", colnames(stroma@meta.data), value = TRUE)

VlnPlot(stroma, features = stroma_score_cols,
        group.by = "seurat_clusters", pt.size = 0,
        ncol = 3) &
  theme(axis.text.x = element_text(size = 10))

# Split by tissue for each module
VlnPlot(stroma, features = stroma_score_cols,
        group.by = "seurat_clusters", split.by = "Tissue",
        pt.size = 0, ncol = 3,
        cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) &
  theme(axis.text.x = element_text(size = 10))

3. Myeloid Subclustering

Pool macrophages and CNS microglia-like into a single myeloid object. BM microglia-like cells are excluded (CNS-restricted population, few if any BM cells).

3a — Pool and Subcluster

# Macrophages from both tissues + microglia-like from CNS only
mac_cells <- WhichCells(niche, expression = cell_annotation == "Macrophages")
mg_cns_cells <- WhichCells(niche, expression = cell_annotation == "Microglia-like macrophages" & Tissue == "CNS")

cat("BM microglia-like excluded:",
    sum(niche$cell_annotation == "Microglia-like macrophages" & niche$Tissue == "BM"), "cells\n")

## BM microglia-like excluded: 64 cells

myeloid <- subset(niche, cells = c(mac_cells, mg_cns_cells))

# Preserve original annotation for later comparison
myeloid$original_annotation <- myeloid$cell_annotation

cat("Myeloid pool:", ncol(myeloid), "cells\n")

## Myeloid pool: 3507 cells

print(table(myeloid$original_annotation, myeloid$Tissue))

##                             
##                                BM  CNS
##   Macrophages                1127  904
##   Microglia-like macrophages    0 1476

myeloid <- NormalizeData(myeloid, verbose = FALSE)
myeloid <- FindVariableFeatures(myeloid, nfeatures = 2000, verbose = FALSE)
myeloid <- ScaleData(myeloid, verbose = FALSE)
myeloid <- RunPCA(myeloid, npcs = 20, verbose = FALSE)
myeloid <- RunUMAP(myeloid, dims = 1:15, verbose = FALSE)
myeloid <- FindNeighbors(myeloid, dims = 1:15, verbose = FALSE)

# Multiple resolutions
for (res in c(0.3, 0.5, 0.8)) {
  myeloid <- FindClusters(myeloid, resolution = res, verbose = FALSE)
  cat("Resolution", res, "->",
      length(unique(myeloid@meta.data[[paste0("originalexp_snn_res.", res)]])), "clusters\n")
}

## Resolution 0.3 -> 9 clusters
## Resolution 0.5 -> 12 clusters
## Resolution 0.8 -> 13 clusters

myeloid <- FindClusters(myeloid, resolution = 0.5, verbose = FALSE)
Idents(myeloid) <- "seurat_clusters"

p1 <- DimPlot(myeloid, group.by = "seurat_clusters",
              label = TRUE, label.size = 5) +
  ggtitle("Myeloid Subclusters")

p2 <- DimPlot(myeloid, group.by = "Tissue",
              cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) +
  ggtitle("By Tissue")

p3 <- DimPlot(myeloid, group.by = "original_annotation",
              cols = c("Macrophages" = "#FF7F00", "Microglia-like macrophages" = "#984EA3")) +
  ggtitle("Original Annotation")

p4 <- DimPlot(myeloid, group.by = "original_annotation",
              split.by = "Tissue",
              cols = c("Macrophages" = "#FF7F00", "Microglia-like macrophages" = "#984EA3")) +
  ggtitle("Annotation x Tissue") + NoLegend()

(p1 + p2) / (p3 + p4)

myeloid_comp <- myeloid@meta.data %>%
  count(seurat_clusters, original_annotation, Tissue) %>%
  group_by(seurat_clusters) %>%
  mutate(total = sum(n), pct = round(n / total * 100, 1)) %>%
  ungroup()

cat("Myeloid cluster composition:\n")

## Myeloid cluster composition:

print(myeloid_comp %>% pivot_wider(id_cols = c(seurat_clusters, total),
                                     names_from = c(original_annotation, Tissue),
                                     values_from = n, values_fill = 0))

## # A tibble: 12 × 5
##    seurat_clusters total Macrophages_CNS Microglia-like macroph…¹ Macrophages_BM
##    <fct>           <int>           <int>                    <int>          <int>
##  1 0                1169               1                     1168              0
##  2 1                 582              76                        7            499
##  3 2                 565             496                       48             21
##  4 3                 433              27                        0            406
##  5 4                 202              64                       18            120
##  6 5                 128             115                        8              5
##  7 6                 115               4                      111              0
##  8 7                 108             102                        6              0
##  9 8                  73               3                       70              0
## 10 9                  63               3                        0             60
## 11 10                 43               4                       39              0
## 12 11                 26               9                        1             16
## # ℹ abbreviated name: ¹`Microglia-like macrophages_CNS`

3b — Myeloid Cluster Markers

myeloid_markers <- FindAllMarkers(myeloid, only.pos = TRUE,
                                    min.pct = 0.2, logfc.threshold = 0.5,
                                    verbose = FALSE)
cat("Total marker genes:", nrow(myeloid_markers), "\n")

## Total marker genes: 12288

myeloid_markers %>%
  group_by(cluster) %>%
  slice_max(avg_log2FC, n = 10) %>%
  select(cluster, gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  kable(digits = c(NA, NA, 2, 2, 2, 3), caption = "Top 10 markers per myeloid subcluster")

Top 10 markers per myeloid subcluster
cluster	gene	avg_log2FC	pct.1	pct.2	p_val_adj
0	Upk1b	4.38	0.22	0.01	0.000
0	Slc2a5	4.27	0.56	0.04	0.000
0	Thrsp	4.26	0.21	0.01	0.000
0	Tmem100	4.13	0.31	0.02	0.000
0	Ecscr	4.05	0.58	0.04	0.000
0	Jam2	3.96	0.28	0.02	0.000
0	Tmem119	3.95	0.95	0.09	0.000
0	P2ry12	3.95	1.00	0.22	0.000
0	Gal3st4	3.86	0.48	0.04	0.000
0	Adora3	3.84	0.47	0.04	0.000
1	H2-Aa	3.67	0.40	0.06	0.000
1	H2-Eb1	3.66	0.32	0.04	0.000
1	H2-Ab1	3.47	0.42	0.10	0.000
1	Cd74	3.32	0.53	0.23	0.000
1	Gpnmb	2.96	0.44	0.10	0.000
1	Atp6v0d2	2.70	0.60	0.14	0.000
1	Pla2g2d	2.65	0.26	0.07	0.000
1	Anpep	2.64	0.38	0.08	0.000
1	Acp5	2.47	0.66	0.13	0.000
1	Cd4	2.38	0.46	0.12	0.000
2	Akr1b8	4.67	0.44	0.04	0.000
2	Srxn1	3.96	0.33	0.05	0.000
2	Clec4d	3.79	0.50	0.06	0.000
2	Mt2	3.53	0.68	0.12	0.000
2	Maoa	3.49	0.22	0.03	0.000
2	Prdx1	3.17	1.00	0.75	0.000
2	Cbr2	3.04	0.83	0.16	0.000
2	Cd209f	2.99	0.25	0.04	0.000
2	Chchd10	2.85	0.31	0.04	0.000
2	Ccl8	2.78	0.68	0.15	0.000
3	Itgad	4.32	0.60	0.04	0.000
3	Ubd	4.20	0.25	0.01	0.000
3	Ccr3	4.14	0.58	0.04	0.000
3	Kcna2	3.98	0.20	0.01	0.000
3	Paqr9	3.90	0.27	0.02	0.000
3	Ccdc148	3.90	0.28	0.02	0.000
3	Tmem26	3.47	0.24	0.03	0.000
3	Cd5l	3.43	0.98	0.15	0.000
3	Pira2	3.32	0.28	0.03	0.000
3	Mrap	3.30	0.46	0.06	0.000
4	Aurka	7.42	0.33	0.00	0.000
4	Nusap1	7.35	0.46	0.00	0.000
4	Prc1	7.09	0.54	0.00	0.000
4	Pimreg	6.74	0.20	0.00	0.000
4	H2bc14	6.67	0.24	0.00	0.000
4	Plk1	6.61	0.40	0.00	0.000
4	H1f5	6.56	0.67	0.04	0.000
4	Birc5	6.53	0.68	0.01	0.000
4	Ube2c	6.50	0.63	0.02	0.000
4	Ckap2l	6.49	0.45	0.01	0.000
5	Pde7b	6.78	0.33	0.01	0.000
5	Gm56990	6.76	0.21	0.00	0.000
5	Adam33	6.15	0.32	0.01	0.000
5	Klra2	5.98	0.23	0.01	0.000
5	F630028O10Rik	5.12	0.76	0.04	0.000
5	Gm56663	4.65	0.31	0.02	0.000
5	Ccr1	4.65	0.71	0.06	0.000
5	Cp	4.47	0.63	0.06	0.000
5	Trps1	4.44	0.60	0.06	0.000
5	Oas2	4.18	0.29	0.02	0.000
6	Ifi206	4.77	0.30	0.01	0.000
6	Ifit3b	4.28	0.34	0.01	0.000
6	Mx1	4.21	0.51	0.03	0.000
6	Ifit2	4.09	0.57	0.04	0.000
6	Ifit1	4.02	0.31	0.02	0.000
6	Usp18	4.02	0.44	0.04	0.000
6	Oasl2	3.90	0.83	0.08	0.000
6	Ms4a4c	3.89	0.30	0.03	0.000
6	Rsad2	3.82	0.30	0.02	0.000
6	Zbp1	3.81	0.44	0.04	0.000
7	Ptgds	5.15	0.34	0.02	0.000
7	C4b	4.29	0.48	0.06	0.000
7	Gpx3	4.20	0.65	0.09	0.000
7	Igfbp4	4.19	0.79	0.15	0.000
7	Mgl2	3.81	0.29	0.03	0.000
7	Ednrb	3.33	0.35	0.05	0.000
7	Ccl24	3.20	0.38	0.06	0.000
7	Lyve1	3.19	0.82	0.10	0.000
7	Cd163	3.07	0.81	0.19	0.000
7	Gas7	3.07	0.45	0.10	0.000
8	Gm32006	5.19	0.20	0.01	0.000
8	Setbp1	4.11	0.22	0.04	0.000
8	Cables1	4.10	0.62	0.12	0.000
8	Myo18b	4.05	0.22	0.03	0.000
8	Fhit	3.93	0.69	0.20	0.000
8	Khdrbs3	3.87	0.27	0.05	0.000
8	Plcl1	3.84	0.89	0.30	0.000
8	Smyd3	3.83	0.69	0.17	0.000
8	Vis1	3.79	0.63	0.13	0.000
8	1700028E10Rik	3.78	0.26	0.05	0.000
9	Aqp1	12.62	0.36	0.00	0.000
9	Cspg4	12.22	0.44	0.00	0.000
9	Fn1	10.40	0.67	0.00	0.000
9	Dpysl3	10.13	0.52	0.00	0.000
9	Serpinh1	9.17	0.25	0.00	0.000
9	Tppp3	9.04	0.73	0.00	0.000
9	Mamdc2	9.02	0.21	0.00	0.000
9	Vcan	8.36	0.60	0.00	0.000
9	Atp2b4	8.32	0.40	0.00	0.000
9	Nherf2	7.54	0.64	0.00	0.000
10	Cst7	5.03	0.49	0.03	0.000
10	Chst1	3.20	0.21	0.04	0.000
10	Ctse	2.44	0.26	0.06	0.001
10	Hcar2	2.35	0.21	0.04	0.006
10	Bicd2	2.32	0.23	0.09	1.000
10	Syngr1	2.28	0.93	0.38	0.000
10	Gm38843	2.21	0.28	0.07	0.003
10	Cd52	2.19	0.93	0.54	0.000
10	Tent5c	2.03	0.26	0.12	1.000
10	Hcst	2.00	0.28	0.11	1.000
11	Cxcl2	8.53	0.38	0.00	0.000
11	Gadd45b	6.66	0.73	0.07	0.000
11	Dusp1	6.03	0.81	0.06	0.000
11	Eno2	5.99	0.23	0.00	0.000
11	Tm4sf19	5.82	0.23	0.00	0.000
11	Tnfrsf12a	5.76	0.50	0.02	0.000
11	St18	5.70	0.23	0.01	0.000
11	Gdf15	5.52	0.77	0.12	0.000
11	Tnf	5.45	0.65	0.05	0.000
11	Hbegf	5.39	0.27	0.02	0.000

top_myeloid <- myeloid_markers %>%
  group_by(cluster) %>%
  slice_max(avg_log2FC, n = 8) %>%
  pull(gene) %>% unique()

DoHeatmap(myeloid, features = top_myeloid, size = 3) +
  theme(axis.text.y = element_text(face = "italic", size = 7)) +
  ggtitle("Myeloid Subcluster Markers")

3c — Identity Marker Panels

myeloid_id_genes <- c(
  # Pan-macrophage
  "Adgre1", "Cd68", "Csf1r", "Fcgr1",
  # Homeostatic microglia
  "P2ry12", "Tmem119", "Hexb", "Siglech", "Sall1",
  # BAM
  "Mrc1", "Lyve1", "Cd163", "Siglec1", "Pf4", "Folr2",
  # Monocyte-derived
  "Ly6c2", "Ccr2", "S100a4", "Sell", "Plac8",
  # Tissue-resident
  "Timd4", "Cbr2",
  # LAM / TREM2
  "Trem2", "Apoe", "Axl", "Cd9", "Spp1", "Gpnmb", "Lpl",
  # Inflammatory
  "Tnf", "Il1b", "Nos2", "Ccl2", "Cd86",
  # Tolerogenic
  "Arg1", "Tgfb1", "Il10", "Cd274",
  # Antigen-presenting
  "H2-Eb1", "H2-Ab1", "Cd74", "Ciita",
  # Complement
  "C1qa", "C1qb", "C1qc"
)
myeloid_id_present <- myeloid_id_genes[myeloid_id_genes %in% rownames(myeloid)]

DotPlot(myeloid, features = myeloid_id_present,
        cols = c("lightgrey", "darkred"), dot.scale = 5) +
  RotatedAxis() +
  ggtitle("Myeloid Identity Markers by Subcluster") +
  theme(axis.text.x = element_text(face = "italic", size = 7))

key_myeloid <- c("Adgre1", "Cd68",
                  "P2ry12", "Tmem119", "Hexb", "Sall1",
                  "Mrc1", "Lyve1", "Cd163",
                  "Trem2", "Apoe", "Axl",
                  "Ly6c2", "Ccr2",
                  "C1qa", "Tgfb1", "H2-Eb1", "Cd274")
key_myeloid_present <- key_myeloid[key_myeloid %in% rownames(myeloid)]

FeaturePlot(myeloid, features = key_myeloid_present,
            order = TRUE, ncol = 4,
            cols = c("lightgrey", "darkred")) &
  theme(plot.title = element_text(face = "bold.italic", size = 10))

3d — Myeloid Functional Scoring

myeloid_modules <- list(
  Tolerogenic = c("Tgfb1", "Tgfb2", "Il10", "Arg1", "Mrc1", "Cd163",
                   "Ptgs2", "Nt5e", "Cd274", "Vegfa", "Socs1", "Socs3"),
  Pro_inflammatory = c("Tnf", "Il1b", "Il6", "Nos2", "Ccl2", "Ccl3",
                         "Ccl4", "Cxcl10", "Cd86", "Cd80", "Irf5", "Stat1"),
  Antigen_presentation = c("H2-Eb1", "H2-Ab1", "H2-Aa", "Cd74", "Ciita",
                             "Cd80", "Cd86", "Tap1", "Tap2", "B2m"),
  TREM2_programme = c("Trem2", "Tyrobp", "Apoe", "Axl", "Cd9", "Spp1",
                       "Lpl", "Cst7", "Gpnmb", "Itgax", "Clec7a"),
  Complement = c("C1qa", "C1qb", "C1qc", "C3", "C4b", "Cfh", "Cfp"),
  Homeostatic_MG = c("P2ry12", "Tmem119", "Hexb", "Siglech", "Sall1",
                       "Cx3cr1", "Csf1r"),
  BAM_signature = c("Mrc1", "Lyve1", "Cd163", "Siglec1", "Pf4",
                      "Cbr2", "Ms4a7", "Folr2"),
  Phagocytosis = c("Mertk", "Axl", "Tyro3", "Cd36", "Megf10",
                     "Marco", "Scarb1")
)

myeloid_mods_filt <- lapply(myeloid_modules, function(g) g[g %in% rownames(myeloid)])
myeloid_mods_filt <- myeloid_mods_filt[sapply(myeloid_mods_filt, length) >= 3]

myeloid <- AddModuleScore_UCell(myeloid, features = myeloid_mods_filt, name = "_UCell")

myeloid_score_cols <- grep("_UCell$", colnames(myeloid@meta.data), value = TRUE)

VlnPlot(myeloid, features = myeloid_score_cols,
        group.by = "seurat_clusters", pt.size = 0,
        ncol = 3) &
  theme(axis.text.x = element_text(size = 10))

# Compare original macrophage vs microglia-like labels within CNS
myeloid_cns <- subset(myeloid, Tissue == "CNS")

VlnPlot(myeloid_cns, features = myeloid_score_cols,
        group.by = "original_annotation", pt.size = 0,
        ncol = 3,
        cols = c("Macrophages" = "#FF7F00", "Microglia-like macrophages" = "#984EA3")) &
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9))

4. Microglia Validation

Key question: are these “microglia-like” cells true parenchymal microglia (which should NOT be in a dural/meningeal dataset) or border-associated macrophages (BAMs) / CNS-associated macrophages (CAMs) that share some microglial features?

4a — Core Microglia Signature

# Parenchymal microglia require: P2ry12, Tmem119, Hexb, Siglech, Sall1
# BAMs share Cx3cr1/Csf1r but LACK the above core set
# BAMs express: Mrc1 (CD206), Lyve1, Cd163, Siglec1, Pf4

mg_validation <- c(
  # Core parenchymal — must be HIGH for true microglia
  "P2ry12", "Tmem119", "Hexb", "Siglech", "Sall1",
  # BAM markers — expected in border-associated
  "Mrc1", "Lyve1", "Cd163", "Siglec1", "Pf4", "Folr2",
  # Shared (uninformative alone)
  "Cx3cr1", "Csf1r", "Adgre1",
  # DAM / activated microglia
  "Trem2", "Apoe", "Axl", "Cd9", "Spp1",
  # Perivascular macrophage
  "Cd36", "Msr1", "Ms4a7"
)
mg_val_present <- mg_validation[mg_validation %in% rownames(myeloid)]

# DotPlot across myeloid subclusters
DotPlot(myeloid, features = mg_val_present,
        cols = c("lightgrey", "darkred"), dot.scale = 5) +
  RotatedAxis() +
  ggtitle("Microglia vs BAM Validation — All Myeloid Subclusters") +
  theme(axis.text.x = element_text(face = "italic", size = 8))

4b — Microglia-like Cells Specifically

# Compare CNS macrophages vs CNS microglia-like on validation panel
myeloid_cns$annot <- myeloid_cns$original_annotation
Idents(myeloid_cns) <- "annot"

DotPlot(myeloid_cns, features = mg_val_present,
        cols = c("lightgrey", "darkred"), dot.scale = 6) +
  RotatedAxis() +
  ggtitle("CNS Macrophages vs Microglia-like — Validation Panel") +
  theme(axis.text.x = element_text(face = "italic", size = 8))

# Direct violin comparison for the most critical markers
critical_mg <- c("P2ry12", "Tmem119", "Hexb", "Sall1",
                  "Mrc1", "Lyve1", "Cd163", "Trem2",
                 "Siglech", "Fcrls", "Gpr34", "P2ry13")
critical_present <- critical_mg[critical_mg %in% rownames(myeloid_cns)]

VlnPlot(myeloid_cns, features = critical_present,
        group.by = "annot", pt.size = 0.1, ncol = 4,
        cols = c("Macrophages" = "#FF7F00", "Microglia-like macrophages" = "#984EA3")) &
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

4c — Microglia Score vs BAM Score

# Scatter: Homeostatic MG score vs BAM score per cell
# True microglia = high MG, low BAM; BAMs = low MG, high BAM

if (all(c("Homeostatic_MG_UCell", "BAM_signature_UCell") %in% colnames(myeloid@meta.data))) {
  
  myeloid@meta.data %>%
    filter(Tissue == "CNS") %>%
    ggplot(aes(x = BAM_signature_UCell, y = Homeostatic_MG_UCell,
               colour = original_annotation)) +
    geom_point(size = 0.5, alpha = 0.5) +
    scale_colour_manual(values = c("Macrophages" = "#FF7F00",
                                    "Microglia-like macrophages" = "#984EA3")) +
    geom_hline(yintercept = 0.15, linetype = "dashed", colour = "grey50") +
    geom_vline(xintercept = 0.15, linetype = "dashed", colour = "grey50") +
    theme_minimal(base_size = 12) +
    labs(title = "Homeostatic Microglia vs BAM Signature — CNS Myeloid",
         subtitle = "True microglia: top-left; BAMs: bottom-right",
         x = "BAM Signature Score (UCell)", y = "Homeostatic MG Score (UCell)",
         colour = "Original\nAnnotation")
}

4d — Interpretation Guide

cat("
=====================================================================
  MICROGLIA VALIDATION — INTERPRETATION
=====================================================================

  TRUE PARENCHYMAL MICROGLIA would show:
    + High P2ry12, Tmem119, Hexb, Siglech, Sall1
    - Low Mrc1, Lyve1, Cd163
    -> Unexpected in dural/meningeal dataset
    -> Would suggest parenchymal contamination

  BORDER-ASSOCIATED MACROPHAGES (BAMs) would show:
    - Low/absent P2ry12, Tmem119, Sall1
    + High Mrc1, Lyve1, Cd163, Pf4
    + May express Cx3cr1, Trem2 (shared)
    -> Expected in dural/meningeal border niche

  DAM/ACTIVATED MICROGLIA-LIKE would show:
    ~ Reduced P2ry12/Tmem119 (downregulated on activation)
    + High Trem2, Apoe, Axl, Cd9, Spp1
    -> Could be disease-associated transition state

=====================================================================
")

## 
## =====================================================================
##   MICROGLIA VALIDATION — INTERPRETATION
## =====================================================================
## 
##   TRUE PARENCHYMAL MICROGLIA would show:
##     + High P2ry12, Tmem119, Hexb, Siglech, Sall1
##     - Low Mrc1, Lyve1, Cd163
##     -> Unexpected in dural/meningeal dataset
##     -> Would suggest parenchymal contamination
## 
##   BORDER-ASSOCIATED MACROPHAGES (BAMs) would show:
##     - Low/absent P2ry12, Tmem119, Sall1
##     + High Mrc1, Lyve1, Cd163, Pf4
##     + May express Cx3cr1, Trem2 (shared)
##     -> Expected in dural/meningeal border niche
## 
##   DAM/ACTIVATED MICROGLIA-LIKE would show:
##     ~ Reduced P2ry12/Tmem119 (downregulated on activation)
##     + High Trem2, Apoe, Axl, Cd9, Spp1
##     -> Could be disease-associated transition state
## 
## =====================================================================

5. CNS vs BM Macrophage Comparison

5a — CNS Macrophage vs Microglia-like DE

Idents(myeloid_cns) <- "original_annotation"

de_mac_vs_mg <- FindMarkers(myeloid_cns,
                              ident.1 = "Microglia-like macrophages",
                              ident.2 = "Macrophages",
                              test.use = "wilcox",
                              min.pct = 0.1, logfc.threshold = 0)
de_mac_vs_mg$gene <- rownames(de_mac_vs_mg)

cat("CNS Microglia-like vs CNS Macrophages — significant DE genes:",
    sum(de_mac_vs_mg$p_val_adj < 0.05), "\n")

## CNS Microglia-like vs CNS Macrophages — significant DE genes: 4725

cat("  Microglia-like enriched (log2FC > 0.5):",
    sum(de_mac_vs_mg$avg_log2FC > 0.5 & de_mac_vs_mg$p_val_adj < 0.05), "\n")

##   Microglia-like enriched (log2FC > 0.5): 3197

cat("  Macrophage enriched (log2FC < -0.5):",
    sum(de_mac_vs_mg$avg_log2FC < -0.5 & de_mac_vs_mg$p_val_adj < 0.05), "\n")

##   Macrophage enriched (log2FC < -0.5): 1363

cat("=== Top 30 Microglia-like enriched genes ===\n")

## === Top 30 Microglia-like enriched genes ===

de_mac_vs_mg %>%
  filter(p_val_adj < 0.05, avg_log2FC > 0.5) %>%
  arrange(desc(avg_log2FC)) %>%
  select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  head(30) %>%
  kable(digits = c(NA, 2, 2, 2, 3))

	gene	avg_log2FC	pct.1	pct.2
Sall1	Sall1	11.50	0.64	0.00
Sall3	Sall3	9.90	0.33	0.00
Jam2	Jam2	9.43	0.26	0.00
Upk1b	Upk1b	9.33	0.19	0.00
Gal3st4	Gal3st4	9.15	0.44	0.00
Gm3739	Gm3739	9.06	0.20	0.00
Yes1	Yes1	8.99	0.19	0.00
H2-Oa	H2-Oa	8.83	0.18	0.00
Gm33100	Gm33100	8.82	0.16	0.00
Asap3	Asap3	8.77	0.16	0.00
Gm43821	Gm43821	8.59	0.14	0.00
Itgb3	Itgb3	8.56	0.15	0.00
Klk8	Klk8	8.56	0.27	0.00
Naalad2	Naalad2	8.56	0.12	0.00
Ecscr	Ecscr	8.43	0.52	0.00
Gm41071	Gm41071	8.37	0.14	0.00
Sdk1	Sdk1	8.24	0.24	0.00
Csmd3	Csmd3	8.08	0.85	0.01
Gm16118	Gm16118	8.01	0.11	0.00
A830008E24Rik	A830008E24Rik	8.00	0.50	0.00
B3galt5	B3galt5	7.98	0.11	0.00
Kcnma1	Kcnma1	7.96	0.25	0.00
6030468B19Rik	6030468B19Rik	7.92	0.10	0.00
Gp9	Gp9	7.92	0.15	0.00
Cd34	Cd34	7.89	0.46	0.00
Kcnd1	Kcnd1	7.77	0.10	0.00
Slc2a5	Slc2a5	7.74	0.50	0.00
Adamts16	Adamts16	7.73	0.11	0.00
Nav3	Nav3	7.51	0.82	0.01
Cacnb2	Cacnb2	7.48	0.54	0.00

cat("\n=== Top 30 Macrophage enriched genes ===\n")

## 
## === Top 30 Macrophage enriched genes ===

de_mac_vs_mg %>%
  filter(p_val_adj < 0.05, avg_log2FC < -0.5) %>%
  arrange(avg_log2FC) %>%
  select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  head(30) %>%
  kable(digits = c(NA, 2, 2, 2, 3))

	gene	avg_log2FC	pct.1	pct.2
Acp5	Acp5	-6.28	0.00	0.13
Mgl2	Mgl2	-6.11	0.00	0.14
Clec10a	Clec10a	-6.08	0.00	0.27
Rgs18	Rgs18	-6.07	0.00	0.13
Nxpe5	Nxpe5	-6.01	0.00	0.14
Cd4	Cd4	-5.92	0.00	0.13
Timd4	Timd4	-5.88	0.00	0.13
Fcna	Fcna	-5.87	0.01	0.50
Lyve1	Lyve1	-5.79	0.01	0.38
Cd38	Cd38	-5.70	0.01	0.40
Mrc1	Mrc1	-5.55	0.06	0.66
Cfp	Cfp	-5.47	0.03	0.56
Colec12	Colec12	-5.47	0.00	0.17
Cd163	Cd163	-5.45	0.01	0.42
Cmbl	Cmbl	-5.34	0.00	0.14
Ccl7	Ccl7	-5.24	0.02	0.28
Ccl8	Ccl8	-5.19	0.02	0.53
Ms4a4a	Ms4a4a	-5.17	0.02	0.53
Pla2g2d	Pla2g2d	-5.15	0.00	0.12
Vav3	Vav3	-5.14	0.01	0.17
Gpx3	Gpx3	-5.11	0.01	0.30
Clec4n	Clec4n	-5.06	0.02	0.51
Folr2	Folr2	-5.06	0.03	0.51
Ednrb	Ednrb	-5.05	0.01	0.18
Spic	Spic	-5.01	0.01	0.26
Cd93	Cd93	-4.91	0.01	0.19
Ifi203	Ifi203	-4.88	0.01	0.20
Cd55	Cd55	-4.84	0.00	0.10
Pf4	Pf4	-4.83	0.04	0.76
Siglec1	Siglec1	-4.82	0.02	0.27

mg_mac_labels <- c("P2ry12", "Tmem119", "Hexb", "Siglech", "Sall1",
                     "Mrc1", "Lyve1", "Cd163", "Pf4", "Folr2",
                     "Trem2", "Apoe", "Axl", "Cd9", "Spp1",
                     "C1qa", "C1qb", "Tgfb1", "Il10",
                     "H2-Eb1", "Cd74", "Ccr2", "Ly6c2",
                     "Adgre1", "Cd68", "Csf1r")

de_mac_vs_mg_plot <- de_mac_vs_mg %>%
  mutate(
    sig = case_when(
      p_val_adj < 0.05 & avg_log2FC > 0.5  ~ "Microglia-like macrophages",
      p_val_adj < 0.05 & avg_log2FC < -0.5 ~ "Macrophage",
      TRUE ~ "NS"
    ),
    label = ifelse(gene %in% mg_mac_labels |
                     (p_val_adj < 1e-10 & abs(avg_log2FC) > 1.5),
                   gene, "")
  )

ggplot(de_mac_vs_mg_plot, aes(x = avg_log2FC, y = -log10(p_val_adj),
                               colour = sig, label = label)) +
  geom_point(size = 0.8, alpha = 0.5) +
  geom_text_repel(size = 3, max.overlaps = 30,
                   fontface = "italic", segment.size = 0.2) +
  scale_colour_manual(values = c("Microglia-like macrophages" = "#984EA3",
                                  "Macrophage" = "#FF7F00", "NS" = "grey80")) +
  geom_vline(xintercept = c(-0.5, 0.5), linetype = "dashed", colour = "grey60") +
  theme_minimal(base_size = 12) +
  labs(title = "CNS: Microglia-like vs Macrophages",
       x = "log2FC (+ = Microglia-like)", y = "-log10(padj)")

6. CNS vs BM Differential Expression + GO

6a — Stroma: CNS vs BM

Idents(stroma) <- "Tissue"

de_stroma <- FindMarkers(stroma,
                          ident.1 = "CNS", ident.2 = "BM",
                          test.use = "wilcox",
                          min.pct = 0.1, logfc.threshold = 0)
de_stroma$gene <- rownames(de_stroma)

cat("Stroma — significant DE genes (padj < 0.05):", sum(de_stroma$p_val_adj < 0.05), "\n")

## Stroma — significant DE genes (padj < 0.05): 5656

cat("  CNS-enriched (log2FC > 0.5):",
    sum(de_stroma$avg_log2FC > 0.5 & de_stroma$p_val_adj < 0.05), "\n")

##   CNS-enriched (log2FC > 0.5): 2152

cat("  BM-enriched (log2FC < -0.5):",
    sum(de_stroma$avg_log2FC < -0.5 & de_stroma$p_val_adj < 0.05), "\n")

##   BM-enriched (log2FC < -0.5): 3174

cat("=== Top 30 CNS-enriched stroma genes ===\n")

## === Top 30 CNS-enriched stroma genes ===

de_stroma %>%
  filter(p_val_adj < 0.05, avg_log2FC > 0.5) %>%
  arrange(desc(avg_log2FC)) %>%
  select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  head(30) %>%
  kable(digits = c(NA, 2, 2, 2, 3))

	gene	avg_log2FC	pct.1
Penk	Penk	12.89	0.56
Prok2	Prok2	12.61	0.69
Slc47a1	Slc47a1	11.57	0.78
Mmp3	Mmp3	11.45	0.19
Slc4a10	Slc4a10	11.29	0.86
Mfap4	Mfap4	10.89	0.47
Ptgdr	Ptgdr	10.87	0.72
Ccn3	Ccn3	10.70	0.75
Cpxm2	Cpxm2	10.59	0.62
Reg3g	Reg3g	10.57	0.24
Prmt8	Prmt8	10.54	0.62
Dcc	Dcc	10.47	0.53
Cpz	Cpz	10.45	0.65
Zic1	Zic1	10.41	0.70
Cldn11	Cldn11	10.37	0.59
Fmod	Fmod	10.07	0.36
Shisa3	Shisa3	10.02	0.79
Il33	Il33	10.01	0.74
Ildr2	Ildr2	9.92	0.84
Igfbp6	Igfbp6	9.92	0.87
Mpzl2	Mpzl2	9.80	0.54
Wfdc1	Wfdc1	9.66	0.58
Tspan11	Tspan11	9.65	0.85
Foxd1	Foxd1	9.59	0.78
Gjb6	Gjb6	9.46	0.41
Angptl7	Angptl7	9.31	0.22
Ctxn3	Ctxn3	9.30	0.46
Hhip	Hhip	9.26	0.36
Zic4	Zic4	9.26	0.47
Npy1r	Npy1r	9.24	0.40

cat("\n=== Top 30 BM-enriched stroma genes ===\n")

## 
## === Top 30 BM-enriched stroma genes ===

de_stroma %>%
  filter(p_val_adj < 0.05, avg_log2FC < -0.5) %>%
  arrange(avg_log2FC) %>%
  select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  head(30) %>%
  kable(digits = c(NA, 2, 2, 2, 3))

	gene	avg_log2FC	pct.1	pct.2
Ibsp	Ibsp	-10.41	0.00	0.19
Mpo	Mpo	-9.87	0.00	0.20
Il1rn	Il1rn	-9.84	0.00	0.19
Adgrl4	Adgrl4	-9.48	0.00	0.46
Kng1	Kng1	-8.48	0.00	0.31
Cpa6	Cpa6	-8.36	0.00	0.14
Podnl1	Podnl1	-8.32	0.00	0.18
Gm32554	Gm32554	-8.11	0.00	0.12
Ctsg	Ctsg	-8.09	0.00	0.13
Hoxa10	Hoxa10	-7.87	0.00	0.22
Hoxa3	Hoxa3	-7.86	0.00	0.26
Shox2	Shox2	-7.79	0.00	0.14
Gna15	Gna15	-7.70	0.00	0.23
Mme	Mme	-7.68	0.00	0.30
Meis1	Meis1	-7.57	0.01	0.56
Fst	Fst	-7.54	0.00	0.14
9030619P08Rik	9030619P08Rik	-7.51	0.00	0.22
Trarg1	Trarg1	-7.45	0.00	0.10
Padi4	Padi4	-7.43	0.00	0.20
Hoxa7	Hoxa7	-7.42	0.00	0.38
Gdpd2	Gdpd2	-7.40	0.00	0.24
Siglech	Siglech	-7.31	0.00	0.11
Gm56622	Gm56622	-7.29	0.00	0.25
Sh2d5	Sh2d5	-7.27	0.00	0.24
Gm47754	Gm47754	-7.23	0.00	0.18
Hmga2	Hmga2	-7.20	0.00	0.21
C6	C6	-7.19	0.00	0.14
Dntt	Dntt	-7.13	0.01	0.25
Cd27	Cd27	-7.11	0.01	0.42
Pparg	Pparg	-7.08	0.01	0.20

niche_genes <- c("Cxcl12", "Cxcl16", "Ptn", "Mdk", "Kitl", "Fn1",
                  "Col1a1", "Col3a1", "Igf1", "Tgfb1", "Tgfb2",
                  "Il33", "Wnt5a", "App", "Apoe", "Lama1",
                  "Pdgfra", "Pdgfrb", "Pecam1", "Rgs5",
                  "Mmp2", "Mmp14", "Postn", "Tnc",
                  "H2-Eb1", "Cd74", "Ptgds")

de_stroma_plot <- de_stroma %>%
  mutate(
    sig = case_when(
      p_val_adj < 0.05 & avg_log2FC > 0.5  ~ "CNS-enriched",
      p_val_adj < 0.05 & avg_log2FC < -0.5 ~ "BM-enriched",
      TRUE ~ "NS"
    ),
    label = ifelse(gene %in% niche_genes |
                     (p_val_adj < 1e-20 & abs(avg_log2FC) > 2),
                   gene, "")
  )

ggplot(de_stroma_plot, aes(x = avg_log2FC, y = -log10(p_val_adj),
                            colour = sig, label = label)) +
  geom_point(size = 0.8, alpha = 0.5) +
  geom_text_repel(size = 3, max.overlaps = 30,
                   fontface = "italic", segment.size = 0.2) +
  scale_colour_manual(values = c("CNS-enriched" = "#B2182B",
                                  "BM-enriched" = "#2166AC", "NS" = "grey80")) +
  geom_vline(xintercept = c(-0.5, 0.5), linetype = "dashed", colour = "grey60") +
  theme_minimal(base_size = 12) +
  labs(title = "Stroma: CNS vs BM", x = "log2FC (+ = CNS)", y = "-log10(padj)")

6b — Stroma GO Enrichment

# CNS-enriched
stroma_cns_genes <- de_stroma %>%
  filter(p_val_adj < 0.05, avg_log2FC > 0.5) %>%
  pull(gene)

# BM-enriched
stroma_bm_genes <- de_stroma %>%
  filter(p_val_adj < 0.05, avg_log2FC < -0.5) %>%
  pull(gene)

if (length(stroma_cns_genes) >= 5) {
  go_stroma_cns <- gost(stroma_cns_genes,
                         organism = "mmusculus",
                         sources = "GO:BP",
                         evcodes = TRUE,
                         significant = TRUE)
  
  if (!is.null(go_stroma_cns$result) && nrow(go_stroma_cns$result) > 0) {
    
    # Also show top terms as table
    go_stroma_cns$result %>%
      filter(source == "GO:BP") %>%
      arrange(p_value) %>%
      head(20) %>%
      select(term_name, p_value, term_size, intersection_size) %>%
      kable(digits = c(NA, 3, 0, 0),
            caption = "Top 20 GO:BP — Stroma CNS-Enriched")
  } else {
    cat("No significant GO terms for CNS-enriched stroma genes\n")
  }
} else {
  cat("Too few CNS-enriched stroma genes for GO analysis\n")
}

Top 20 GO:BP — Stroma CNS-Enriched
term_name	term_size	intersection_size
system development	4155	641
multicellular organism development	4907	707
localization	5309	741
anatomical structure development	6280	818
developmental process	6873	868
transport	4364	625
establishment of localization	4666	653
positive regulation of biological process	6447	804
anatomical structure morphogenesis	2781	448
positive regulation of cellular process	6095	762
cellular localization	3493	506
animal organ development	3272	480
regulation of cell communication	3560	502
regulation of cellular component organization	2537	397
regulation of signaling	3558	501
regulation of signal transduction	2940	438
cell surface receptor signaling pathway	2731	413
nervous system development	2593	398
cell migration	1539	274
macromolecule localization	3054	436

if (length(stroma_bm_genes) >= 5) {
  go_stroma_bm <- gost(stroma_bm_genes,
                         organism = "mmusculus",
                         sources = "GO:BP",
                         evcodes = TRUE,
                         significant = TRUE)
  
  if (!is.null(go_stroma_bm$result) && nrow(go_stroma_bm$result) > 0) {
    
    go_stroma_bm$result %>%
      filter(source == "GO:BP") %>%
      arrange(p_value) %>%
      head(20) %>%
      select(term_name, p_value, term_size, intersection_size) %>%
      kable(digits = c(NA, 3, 0, 0),
            caption = "Top 20 GO:BP — Stroma BM-Enriched")
  } else {
    cat("No significant GO terms for BM-enriched stroma genes\n")
  }
} else {
  cat("Too few BM-enriched stroma genes for GO analysis\n")
}

Top 20 GO:BP — Stroma BM-Enriched
term_name	term_size	intersection_size
positive regulation of biological process	6447	1303
positive regulation of cellular process	6095	1247
regulation of primary metabolic process	5176	1111
regulation of metabolic process	6679	1316
regulation of macromolecule metabolic process	6153	1240
positive regulation of metabolic process	3493	836
positive regulation of macromolecule metabolic process	3174	782
regulation of nucleobase-containing compound metabolic process	3842	882
regulation of biosynthetic process	5622	1137
regulation of macromolecule biosynthetic process	5426	1102
regulation of gene expression	5318	1084
negative regulation of cellular process	5540	1112
negative regulation of biological process	5759	1140
regulation of biological process	13108	2029
regulation of cellular process	12715	1983
biological regulation	13503	2067
positive regulation of biosynthetic process	2697	671
positive regulation of macromolecule biosynthetic process	2571	650
regulation of RNA metabolic process	3545	796
chromatin organization	842	320

6c — Macrophages: CNS vs BM

mac_only <- subset(myeloid, original_annotation == "Macrophages")
Idents(mac_only) <- "Tissue"

de_mac <- FindMarkers(mac_only,
                       ident.1 = "CNS", ident.2 = "BM",
                       test.use = "wilcox",
                       min.pct = 0.1, logfc.threshold = 0)
de_mac$gene <- rownames(de_mac)

cat("Macrophages — significant DE genes:", sum(de_mac$p_val_adj < 0.05), "\n")

## Macrophages — significant DE genes: 1300

cat("  CNS-enriched:", sum(de_mac$avg_log2FC > 0.5 & de_mac$p_val_adj < 0.05), "\n")

##   CNS-enriched: 738

cat("  BM-enriched:", sum(de_mac$avg_log2FC < -0.5 & de_mac$p_val_adj < 0.05), "\n")

##   BM-enriched: 419

cat("=== Top 30 CNS-enriched macrophage genes ===\n")

## === Top 30 CNS-enriched macrophage genes ===

de_mac %>%
  filter(p_val_adj < 0.05, avg_log2FC > 0.5) %>%
  arrange(desc(avg_log2FC)) %>%
  select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  head(30) %>%
  kable(digits = c(NA, 2, 2, 2, 3))

	gene	avg_log2FC	pct.1	pct.2
Ptgds	Ptgds	10.29	0.10	0.00
Fcrls	Fcrls	7.02	0.29	0.01
Mgl2	Mgl2	6.35	0.14	0.00
Smim1	Smim1	4.88	0.10	0.00
Cd209g	Cd209g	4.65	0.16	0.01
Igfbp4	Igfbp4	4.64	0.38	0.04
Grap	Grap	4.07	0.23	0.02
Rgs18	Rgs18	4.06	0.13	0.01
Tln2	Tln2	4.06	0.11	0.01
Afap1l1	Afap1l1	4.04	0.11	0.01
F13a1	F13a1	3.92	0.74	0.08
Mbp	Mbp	3.77	0.12	0.02
Cd209f	Cd209f	3.64	0.23	0.04
Mctp1	Mctp1	3.55	0.18	0.06
Ighm	Ighm	3.50	0.18	0.03
Lpar6	Lpar6	3.48	0.10	0.01
Marcksl1	Marcksl1	3.40	0.31	0.06
Ccnd1	Ccnd1	3.30	0.40	0.07
Gpr183	Gpr183	3.25	0.15	0.03
Inka1	Inka1	3.22	0.12	0.02
Ctla2b	Ctla2b	3.22	0.33	0.06
Akr1b8	Akr1b8	3.19	0.30	0.06
Cbr2	Cbr2	3.16	0.77	0.17
Rbpj	Rbpj	3.05	0.44	0.11
Cx3cr1	Cx3cr1	3.04	0.14	0.04
Ednrb	Ednrb	3.03	0.18	0.03
Etv1	Etv1	2.97	0.16	0.03
Lifr	Lifr	2.86	0.13	0.02
Dab2	Dab2	2.73	0.82	0.30
Sema4a	Sema4a	2.72	0.10	0.02

cat("\n=== Top 30 BM-enriched macrophage genes ===\n")

## 
## === Top 30 BM-enriched macrophage genes ===

de_mac %>%
  filter(p_val_adj < 0.05, avg_log2FC < -0.5) %>%
  arrange(avg_log2FC) %>%
  select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  head(30) %>%
  kable(digits = c(NA, 2, 2, 2, 3))

	gene	avg_log2FC	pct.1	pct.2
Paqr9	Paqr9	-4.34	0.01	0.16
Tmem26	Tmem26	-4.17	0.01	0.16
Ear2	Ear2	-4.08	0.04	0.50
Itgad	Itgad	-4.00	0.03	0.31
Gpd1	Gpd1	-3.94	0.02	0.21
Tspan10	Tspan10	-3.84	0.01	0.13
Cd5l	Cd5l	-3.81	0.09	0.70
Ubd	Ubd	-3.79	0.01	0.13
Treml4	Treml4	-3.69	0.01	0.11
Kcna2	Kcna2	-3.46	0.01	0.11
Pilrb1	Pilrb1	-3.45	0.02	0.21
Kcnj10	Kcnj10	-3.33	0.01	0.13
Ccr3	Ccr3	-3.31	0.03	0.31
Fgr	Fgr	-3.29	0.04	0.31
Mrap	Mrap	-3.28	0.04	0.30
Gfra2	Gfra2	-3.23	0.01	0.10
Clec1b	Clec1b	-2.89	0.03	0.22
Myo10	Myo10	-2.87	0.08	0.40
C3	C3	-2.81	0.03	0.15
Inf2	Inf2	-2.81	0.02	0.12
Crip2	Crip2	-2.80	0.05	0.28
Stab2	Stab2	-2.80	0.02	0.14
Vcam1	Vcam1	-2.79	0.33	0.88
Sdc3	Sdc3	-2.78	0.23	0.72
Pilra	Pilra	-2.74	0.07	0.39
Slc16a9	Slc16a9	-2.68	0.02	0.13
Itgax	Itgax	-2.66	0.01	0.10
Card11	Card11	-2.65	0.02	0.16
Sema6d	Sema6d	-2.65	0.03	0.14
Ccdc148	Ccdc148	-2.63	0.02	0.15

mac_label_genes <- c("Trem2", "Apoe", "Axl", "Cd9", "Spp1", "Gpnmb",
                      "Mrc1", "Lyve1", "Cd163", "Arg1",
                      "H2-Eb1", "H2-Ab1", "Cd74", "Ciita",
                      "Tnf", "Il1b", "Nos2", "Ccl2",
                      "C1qa", "C1qb", "Tgfb1", "Igf1",
                      "Lyz2", "Ly6c2", "Ccr2", "S100a4")

de_mac_plot <- de_mac %>%
  mutate(
    sig = case_when(
      p_val_adj < 0.05 & avg_log2FC > 0.5  ~ "CNS-enriched",
      p_val_adj < 0.05 & avg_log2FC < -0.5 ~ "BM-enriched",
      TRUE ~ "NS"
    ),
    label = ifelse(gene %in% mac_label_genes |
                     (p_val_adj < 1e-20 & abs(avg_log2FC) > 2),
                   gene, "")
  )

ggplot(de_mac_plot, aes(x = avg_log2FC, y = -log10(p_val_adj),
                         colour = sig, label = label)) +
  geom_point(size = 0.8, alpha = 0.5) +
  geom_text_repel(size = 3, max.overlaps = 30,
                   fontface = "italic", segment.size = 0.2) +
  scale_colour_manual(values = c("CNS-enriched" = "#B2182B",
                                  "BM-enriched" = "#2166AC", "NS" = "grey80")) +
  geom_vline(xintercept = c(-0.5, 0.5), linetype = "dashed", colour = "grey60") +
  theme_minimal(base_size = 12) +
  labs(title = "Macrophages: CNS vs BM", x = "log2FC (+ = CNS)", y = "-log10(padj)")

6d — Macrophage GO Enrichment

mac_cns_genes <- de_mac %>%
  filter(p_val_adj < 0.05, avg_log2FC > 0.5) %>%
  pull(gene)

mac_bm_genes <- de_mac %>%
  filter(p_val_adj < 0.05, avg_log2FC < -0.5) %>%
  pull(gene)

if (length(mac_cns_genes) >= 5) {
  go_mac_cns <- gost(mac_cns_genes,
                      organism = "mmusculus",
                      sources = "GO:BP",
                      evcodes = TRUE,
                      significant = TRUE)
  
  if (!is.null(go_mac_cns$result) && nrow(go_mac_cns$result) > 0) {
    
    go_mac_cns$result %>%
      filter(source == "GO:BP") %>%
      arrange(p_value) %>%
      head(20) %>%
      select(term_name, p_value, term_size, intersection_size) %>%
      kable(digits = c(NA, 3, 0, 0),
            caption = "Top 20 GO:BP — Macrophage CNS-Enriched")
  } else {
    cat("No significant GO terms for CNS-enriched macrophage genes\n")
  }
} else {
  cat("Too few CNS-enriched macrophage genes for GO analysis\n")
}

Top 20 GO:BP — Macrophage CNS-Enriched
term_name	term_size	intersection_size
vesicle-mediated transport	1524	125
response to stress	3917	209
transport	4364	218
establishment of localization	4666	226
endocytosis	686	68
cellular localization	3493	184
positive regulation of biological process	6447	281
establishment of localization in cell	2036	128
negative regulation of biological process	5759	258
biological regulation	13503	478
positive regulation of cellular process	6095	267
regulation of biological process	13108	466
localization	5309	241
regulation of cellular process	12715	452
negative regulation of cellular process	5540	245
cellular process	21471	651
intracellular transport	1337	92
endosomal transport	269	37
immune system process	2848	149
import into cell	900	70

if (length(mac_bm_genes) >= 5) {
  go_mac_bm <- gost(mac_bm_genes,
                      organism = "mmusculus",
                      sources = "GO:BP",
                      evcodes = TRUE,
                      significant = TRUE)
  
  if (!is.null(go_mac_bm$result) && nrow(go_mac_bm$result) > 0) {
    
    go_mac_bm$result %>%
      filter(source == "GO:BP") %>%
      arrange(p_value) %>%
      head(20) %>%
      select(term_name, p_value, term_size, intersection_size) %>%
      kable(digits = c(NA, 3, 0, 0),
            caption = "Top 20 GO:BP — Macrophage BM-Enriched")
  } else {
    cat("No significant GO terms for BM-enriched macrophage genes\n")
  }
} else {
  cat("Too few BM-enriched macrophage genes for GO analysis\n")
}

Top 20 GO:BP — Macrophage BM-Enriched
term_name	term_size	intersection_size
localization	5309	196
transport	4364	171
establishment of localization	4666	174
import into cell	900	69
immune system process	2848	119
endocytosis	686	53
developmental process	6873	194
anatomical structure development	6280	183
response to stress	3917	136
cell migration	1539	78
cell motility	1804	83
transmembrane transport	1525	75
regulation of immune system process	1586	76
response to stimulus	10427	246
phagocytosis	221	29
regulation of cell migration	978	57
homeostatic process	1824	80
monoatomic ion transport	1235	64
positive regulation of biological process	6447	176
regulation of cell motility	1034	58

6e — CNS Microglia-like vs Macrophage GO

mg_enriched <- de_mac_vs_mg %>%
  filter(p_val_adj < 0.05, avg_log2FC > 0.5) %>%
  pull(gene)

mac_enriched <- de_mac_vs_mg %>%
  filter(p_val_adj < 0.05, avg_log2FC < -0.5) %>%
  pull(gene)

if (length(mg_enriched) >= 5) {
  go_mg <- gost(mg_enriched,
                 organism = "mmusculus",
                 sources = "GO:BP",
                 evcodes = TRUE,
                 significant = TRUE)
  
  if (!is.null(go_mg$result) && nrow(go_mg$result) > 0) {
    
    go_mg$result %>%
      filter(source == "GO:BP") %>%
      arrange(p_value) %>%
      head(20) %>%
      select(term_name, p_value, term_size, intersection_size) %>%
      kable(digits = c(NA, 3, 0, 0),
            caption = "Top 20 GO:BP — Microglia-like Enriched")
  }
}

Top 20 GO:BP — Microglia-like Enriched
term_name	term_size	intersection_size
positive regulation of biological process	6447	1282
positive regulation of cellular process	6095	1231
regulation of primary metabolic process	5176	1098
regulation of metabolic process	6679	1287
positive regulation of metabolic process	3493	815
protein metabolic process	4467	953
macromolecule modification	2695	676
protein modification process	2540	650
regulation of macromolecule metabolic process	6153	1187
positive regulation of macromolecule metabolic process	3174	750
biological regulation	13503	2073
regulation of biological process	13108	2021
regulation of cellular process	12715	1974
regulation of nucleobase-containing compound metabolic process	3842	823
regulation of biosynthetic process	5622	1072
regulation of macromolecule biosynthetic process	5426	1034
intracellular signal transduction	2810	648
localization	5309	1015
regulation of gene expression	5318	1016
regulation of RNA metabolic process	3545	757

if (length(mac_enriched) >= 5) {
  go_mac_enr <- gost(mac_enriched,
                       organism = "mmusculus",
                       sources = "GO:BP",
                       evcodes = TRUE,
                       significant = TRUE)
  
  if (!is.null(go_mac_enr$result) && nrow(go_mac_enr$result) > 0) {
    
    go_mac_enr$result %>%
      filter(source == "GO:BP") %>%
      arrange(p_value) %>%
      head(20) %>%
      select(term_name, p_value, term_size, intersection_size) %>%
      kable(digits = c(NA, 3, 0, 0),
            caption = "Top 20 GO:BP — CNS Macrophage Enriched")
  }
}

Top 20 GO:BP — CNS Macrophage Enriched
term_name	term_size	intersection_size
translation	716	159
cytoplasmic translation	167	80
generation of precursor metabolites and energy	468	123
aerobic respiration	200	78
cellular respiration	245	85
small molecule metabolic process	1762	238
energy derivation by oxidation of organic compounds	347	98
oxidative phosphorylation	145	64
organelle organization	3494	355
catabolic process	2530	288
protein metabolic process	4467	417
cellular process	21471	1203
nucleoside phosphate metabolic process	639	125
nucleobase-containing small molecule metabolic process	666	127
translation at synapse	52	38
translation at postsynapse	52	38
purine-containing compound metabolic process	571	113
purine nucleotide metabolic process	428	97
nucleotide metabolic process	484	103
translation at presynapse	51	37

7. CellChat Validation on Niche Cells

7a — TREM2/APP Axis

trem2_genes <- c("Trem2", "Tyrobp", "App", "Apoe", "Cd74", "Sort1")
trem2_present <- trem2_genes[trem2_genes %in% rownames(myeloid)]

# On myeloid subclusters
VlnPlot(myeloid, features = trem2_present,
        group.by = "seurat_clusters", split.by = "Tissue",
        pt.size = 0, ncol = 3,
        cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) &
  theme(axis.text.x = element_text(size = 9))

7b — Retention Chemokines (on Stroma)

retention_genes <- c("Cxcl12", "Cxcl16", "Vcam1", "Icam1",
                      "Ccl2", "Ccl7", "Cxcl14", "Kitl")
retention_present <- retention_genes[retention_genes %in% rownames(stroma)]

VlnPlot(stroma, features = retention_present,
        group.by = "seurat_clusters", split.by = "Tissue",
        pt.size = 0, ncol = 4,
        cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) &
  theme(axis.text.x = element_text(size = 9))

7c — Immunosuppressive Molecules

immunosupp <- c("Lgals9", "Cd274", "Pdcd1lg2", "Tgfb1",
                 "Il10", "Arg1", "Nt5e", "Ptgs2")
immunosupp_present <- immunosupp[immunosupp %in% rownames(myeloid)]

VlnPlot(myeloid, features = immunosupp_present,
        group.by = "seurat_clusters", split.by = "Tissue",
        pt.size = 0, ncol = 4,
        cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) &
  theme(axis.text.x = element_text(size = 9))

7d — IL-17 Receptors on Niche Cells

il17r_genes <- c("Il17ra", "Il17rb", "Il17rc", "Il17rd", "Il17re")

# Check on stroma
il17r_stroma <- il17r_genes[il17r_genes %in% rownames(stroma)]
if (length(il17r_stroma) > 0) {
  print(VlnPlot(stroma, features = il17r_stroma,
                group.by = "seurat_clusters", split.by = "Tissue",
                pt.size = 0, ncol = 3,
                cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) &
          theme(axis.text.x = element_text(size = 9)) &
          ggtitle("IL-17R — Stroma"))
}

# Check on myeloid
il17r_myeloid <- il17r_genes[il17r_genes %in% rownames(myeloid)]
if (length(il17r_myeloid) > 0) {
  print(VlnPlot(myeloid, features = il17r_myeloid,
                group.by = "seurat_clusters", split.by = "Tissue",
                pt.size = 0, ncol = 3,
                cols = c("BM" = "#2166AC", "CNS" = "#B2182B")) &
          theme(axis.text.x = element_text(size = 9)) &
          ggtitle("IL-17R — Myeloid"))
}

8. Export

# DE results
write.csv(de_stroma, file.path(cache_dir, "13b_DE_stroma_CNS_vs_BM.csv"),
          row.names = FALSE)
write.csv(de_mac, file.path(cache_dir, "13b_DE_macrophages_CNS_vs_BM.csv"),
          row.names = FALSE)
write.csv(de_mac_vs_mg, file.path(cache_dir, "13b_DE_CNS_macrophages_vs_microglia.csv"),
          row.names = FALSE)

# Cluster markers
write.csv(stroma_markers, file.path(cache_dir, "13b_stroma_subcluster_markers.csv"),
          row.names = FALSE)
write.csv(myeloid_markers, file.path(cache_dir, "13b_myeloid_subcluster_markers.csv"),
          row.names = FALSE)

# GO results (if they exist)

# Helper to flatten gprofiler2 list columns for CSV export
write_gost <- function(gost_obj, path) {
  if (!is.null(gost_obj$result)) {
    df <- gost_obj$result %>%
      select(-any_of(c("parents", "intersection")))
    write.csv(df, path, row.names = FALSE)
  }
}

if (exists("go_stroma_cns")) write_gost(go_stroma_cns, file.path(cache_dir, "13b_GO_stroma_CNS_enriched.csv"))
if (exists("go_stroma_bm")) write_gost(go_stroma_bm, file.path(cache_dir, "13b_GO_stroma_BM_enriched.csv"))
if (exists("go_mac_cns")) write_gost(go_mac_cns, file.path(cache_dir, "13b_GO_macrophage_CNS_enriched.csv"))
if (exists("go_mac_bm")) write_gost(go_mac_bm, file.path(cache_dir, "13b_GO_macrophage_BM_enriched.csv"))
if (exists("go_mg")) write_gost(go_mg, file.path(cache_dir, "13b_GO_microglia_vs_mac.csv"))

# Save subclustered objects
qs_save(stroma, file.path(cache_dir, "13b_stroma_subclustered.qs"))
qs_save(myeloid, file.path(cache_dir, "13b_myeloid_subclustered.qs"))

cat("All results saved to:", cache_dir, "\n")

## All results saved to: /exports/eddie/scratch/aduguid3/harmony_clustering

Session Information

sessionInfo()

## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Rocky Linux 9.5 (Blue Onyx)
## 
## Matrix products: default
## BLAS/LAPACK: /opt/intel/oneapi/mkl/2024.0/lib/libmkl_gf_lp64.so.2;  LAPACK version 3.10.1
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/London
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] UCell_2.10.1          knitr_1.51            circlize_0.4.17      
##  [4] gprofiler2_0.2.4      ComplexHeatmap_2.22.0 patchwork_1.3.2      
##  [7] ggrepel_0.9.6         ggplot2_4.0.2         tidyr_1.3.1          
## [10] dplyr_1.1.4           qs2_0.1.7             Seurat_5.4.0         
## [13] SeuratObject_5.3.0    sp_2.1-4             
## 
## loaded via a namespace (and not attached):
##   [1] RcppAnnoy_0.0.23            splines_4.4.1              
##   [3] later_1.4.7                 bitops_1.0-9               
##   [5] tibble_3.3.1                polyclip_1.10-7            
##   [7] fastDummies_1.7.5           lifecycle_1.0.5            
##   [9] doParallel_1.0.17           globals_0.16.3             
##  [11] lattice_0.22-6              MASS_7.3-60.2              
##  [13] magrittr_2.0.3              limma_3.62.2               
##  [15] plotly_4.12.0               sass_0.4.9                 
##  [17] rmarkdown_2.30              jquerylib_0.1.4            
##  [19] yaml_2.3.12                 httpuv_1.6.16              
##  [21] otel_0.2.0                  sctransform_0.4.3          
##  [23] spam_2.11-3                 spatstat.sparse_3.1-0      
##  [25] reticulate_1.45.0           cowplot_1.2.0              
##  [27] pbapply_1.7-4               RColorBrewer_1.1-3         
##  [29] abind_1.4-5                 zlibbioc_1.52.0            
##  [31] Rtsne_0.17                  GenomicRanges_1.58.0       
##  [33] purrr_1.0.2                 presto_1.0.0               
##  [35] RCurl_1.98-1.17             BiocGenerics_0.52.0        
##  [37] GenomeInfoDbData_1.2.13     IRanges_2.40.1             
##  [39] S4Vectors_0.44.0            irlba_2.3.7                
##  [41] listenv_0.9.1               spatstat.utils_3.2-1       
##  [43] goftest_1.2-3               RSpectra_0.16-2            
##  [45] spatstat.random_3.4-4       fitdistrplus_1.2-6         
##  [47] parallelly_1.37.1           codetools_0.2-20           
##  [49] DelayedArray_0.32.0         tidyselect_1.2.1           
##  [51] shape_1.4.6.1               UCSC.utils_1.2.0           
##  [53] farver_2.1.2                matrixStats_1.5.0          
##  [55] stats4_4.4.1                spatstat.explore_3.7-0     
##  [57] jsonlite_2.0.0              GetoptLong_1.1.0           
##  [59] BiocNeighbors_2.0.1         progressr_0.18.0           
##  [61] ggridges_0.5.6              survival_3.6-4             
##  [63] iterators_1.0.14            foreach_1.5.2              
##  [65] tools_4.4.1                 ica_1.0-3                  
##  [67] Rcpp_1.0.12                 glue_1.8.0                 
##  [69] gridExtra_2.3               SparseArray_1.6.2          
##  [71] xfun_0.56                   MatrixGenerics_1.18.1      
##  [73] GenomeInfoDb_1.42.3         withr_3.0.2                
##  [75] fastmap_1.2.0               fansi_1.0.6                
##  [77] digest_0.6.35               R6_2.6.1                   
##  [79] mime_0.12                   colorspace_2.1-0           
##  [81] scattermore_1.2             tensor_1.5.1               
##  [83] dichromat_2.0-0.1           spatstat.data_3.1-9        
##  [85] utf8_1.2.4                  generics_0.1.3             
##  [87] data.table_1.15.4           httr_1.4.7                 
##  [89] htmlwidgets_1.6.4           S4Arrays_1.6.0             
##  [91] uwot_0.2.4                  pkgconfig_2.0.3            
##  [93] gtable_0.3.6                lmtest_0.9-40              
##  [95] S7_0.2.1                    SingleCellExperiment_1.28.1
##  [97] XVector_0.46.0              htmltools_0.5.8.1          
##  [99] dotCall64_1.2               clue_0.3-67                
## [101] scales_1.4.0                Biobase_2.66.0             
## [103] png_0.1-8                   spatstat.univar_3.1-6      
## [105] rstudioapi_0.16.0           reshape2_1.4.4             
## [107] rjson_0.2.23                curl_7.0.0                 
## [109] nlme_3.1-164                cachem_1.1.0               
## [111] zoo_1.8-15                  GlobalOptions_0.1.3        
## [113] stringr_1.5.1               KernSmooth_2.23-24         
## [115] vipor_0.4.7                 parallel_4.4.1             
## [117] miniUI_0.1.2                ggrastr_1.0.2              
## [119] pillar_1.9.0                vctrs_0.6.5                
## [121] RANN_2.6.2                  promises_1.5.0             
## [123] stringfish_0.18.0           xtable_1.8-8               
## [125] cluster_2.1.6               beeswarm_0.4.0             
## [127] evaluate_1.0.5              cli_3.6.5                  
## [129] compiler_4.4.1              rlang_1.1.7                
## [131] crayon_1.5.2                future.apply_1.11.2        
## [133] labeling_0.4.3              ggbeeswarm_0.7.3           
## [135] plyr_1.8.9                  stringi_1.8.4              
## [137] viridisLite_0.4.2           deldir_2.0-4               
## [139] BiocParallel_1.40.2         lazyeval_0.2.2             
## [141] spatstat.geom_3.7-0         Matrix_1.7-0               
## [143] RcppHNSW_0.6.0              future_1.33.2              
## [145] statmod_1.5.1               shiny_1.13.0               
## [147] SummarizedExperiment_1.36.0 ROCR_1.0-12                
## [149] igraph_2.2.2                RcppParallel_5.1.8         
## [151] bslib_0.7.0

9: Niche Cell Characterisation — Stroma, Macrophages, Microglia-like macrophages

Marker panels, CNS vs BM DE, and functional module scoring

2026-02-25