Setup and Load Seurat Object
Show code
library (Seurat)
library (tidyverse)
library (patchwork)
library (harmony)
library (ggrepel)
set.seed (1234 )
project_dir <- "/Users/yoshimurasouhei/Downloads/010_school/4年生/bioinfomaticsリサーチクラークシップ/PD_2026"
fernando_path <- file.path (
project_dir,
"lncRNAs_PD_Fer" ,
"snRNAseq" ,
"seurat_object_annotated.rds"
)
results_dir <- file.path (
project_dir,
"results" ,
"Step13_microglia_harmony_reclustering"
)
dir.create (results_dir, recursive = TRUE , showWarnings = FALSE )
if (! file.exists (fernando_path)) {
stop ("Fernando-annotated Seurat object not found at: " , fernando_path)
}
SO <- readRDS (fernando_path)
SO
An object of class Seurat
87674 features across 46066 samples within 2 assays
Active assay: SCT (39901 features, 3000 variable features)
3 layers present: counts, data, scale.data
1 other assay present: RNA
3 dimensional reductions calculated: pca, umap, harmony
Inspect Full-Object UMAP
Show code
available_reductions <- Reductions (SO)
available_reductions
full_umap_reduction <- case_when (
"umap_rpca" %in% available_reductions ~ "umap_rpca" ,
"umap" %in% available_reductions ~ "umap" ,
TRUE ~ NA_character_
)
if (! is.na (full_umap_reduction)) {
DimPlot (
SO,
reduction = full_umap_reduction,
group.by = "Cell_types" ,
label = TRUE ,
repel = TRUE ,
pt.size = 0.4
) +
labs (title = "Full Object: Cell-Type Annotations" )
} else {
message ("No existing UMAP reduction found. Skipping full-object DimPlot." )
}
Sample-Level Microglia Fraction
The fraction is computed from the full object so that the denominator is total cells per sample, not microglia-only cells.
Show code
# Flag microglia in the full object
SO$ is_microglia <- SO$ fernando_celltype_annotation %in% target_microglia
# Determine which gene-count column exists
feature_col <- case_when (
"nFeature_RNA" %in% colnames (SO@ meta.data) ~ "nFeature_RNA" ,
"nFeature_SCT" %in% colnames (SO@ meta.data) ~ "nFeature_SCT" ,
TRUE ~ NA_character_
)
if (is.na (feature_col)) {
stop ("Neither nFeature_RNA nor nFeature_SCT found in metadata." )
}
sample_microglia_qc <- SO@ meta.data %>%
mutate (
is_microglia = SO$ is_microglia,
nFeature_use = .data[[feature_col]]
) %>%
group_by (
sample_use,
condition_use,
sex = case_when (
stringr:: str_detect (sample_use, "_F$" ) ~ "Female" ,
stringr:: str_detect (sample_use, "_M$" ) ~ "Male" ,
TRUE ~ "Unknown"
)
) %>%
summarise (
total_cells = n (),
microglia_cells = sum (is_microglia),
microglia_percent = 100 * microglia_cells / total_cells,
mean_gene_number = mean (nFeature_use, na.rm = TRUE ),
median_gene_number = median (nFeature_use, na.rm = TRUE ),
.groups = "drop"
) %>%
arrange (desc (microglia_percent))
write.csv (
sample_microglia_qc,
file.path (results_dir, "Step13A_sample_microglia_fraction_vs_gene_number.csv" ),
row.names = FALSE
)
Simple bar plot of microglia fraction per sample
Show code
plot_microglia_fraction <- function (sort_method) {
if (sort_method == "microglia_percent" ) {
sample_levels <- sample_microglia_qc %>% arrange (microglia_percent) %>% pull (sample_use)
} else if (sort_method == "microglia_count" ) {
sample_levels <- sample_microglia_qc %>% arrange (microglia_cells) %>% pull (sample_use)
} else {
# sample_name (coord_flipで上が若番になるよう逆順に並べる)
sample_levels <- sort (unique (sample_microglia_qc$ sample_use), decreasing = TRUE )
}
sample_microglia_qc_ordered <- sample_microglia_qc %>%
mutate (sample_use = factor (sample_use, levels = sample_levels))
sample_microglia_fraction_long <- sample_microglia_qc_ordered %>%
mutate (other_cells = total_cells - microglia_cells) %>%
select (
sample_use,
condition_use,
total_cells,
microglia_cells,
other_cells,
microglia_percent
) %>%
pivot_longer (
cols = c (microglia_cells, other_cells),
names_to = "cell_category" ,
values_to = "n_cells"
) %>%
mutate (
cell_category = case_when (
cell_category == "microglia_cells" ~ "Microglia" ,
cell_category == "other_cells" ~ "Other cells"
),
cell_category = factor (
cell_category,
levels = c ("Other cells" , "Microglia" )
)
)
p <- ggplot (
sample_microglia_fraction_long,
aes (
x = sample_use,
y = n_cells,
fill = cell_category
)
) +
geom_col (width = 0.75 ) +
geom_text (
data = sample_microglia_qc_ordered %>%
mutate (
label = sprintf (
"%.1f%% (%d / %d cells)" ,
microglia_percent,
microglia_cells,
total_cells
)
),
aes (
x = sample_use,
y = total_cells,
label = label
),
inherit.aes = FALSE ,
hjust = - 0.05 ,
size = 3.2
) +
coord_flip (clip = "off" ) +
scale_y_continuous (
expand = expansion (mult = c (0 , 0.35 ))
) +
labs (
title = "Microglia Fraction per Sample" ,
subtitle = "Each bar represents total cell count; label shows microglia fraction" ,
x = "Sample" ,
y = "Total cell count" ,
fill = "Cell category"
) +
theme_minimal () +
theme (
plot.margin = margin (5.5 , 80 , 5.5 , 5.5 )
)
return (p)
}
Show code
plot_microglia_fraction ("microglia_percent" )
Show code
plot_microglia_fraction ("microglia_count" )
Show code
plot_microglia_fraction ("sample_name" )
Sample-Level Microglia QC Table
Show code
DT:: datatable (
sample_microglia_qc,
filter = "top" ,
rownames = FALSE ,
options = list (pageLength = 10 , scrollX = TRUE )
)
Preprocess Microglia
PCA is re-computed within the microglia subset. This PCA is used as input for Harmony.
Show code
DefaultAssay (SO_micro) <- "RNA"
if ("Assay5" %in% class (SO_micro[["RNA" ]])) {
SO_micro <- JoinLayers (SO_micro, assay = "RNA" )
}
SO_micro <- NormalizeData (SO_micro, verbose = FALSE )
SO_micro <- FindVariableFeatures (
SO_micro,
selection.method = "vst" ,
nfeatures = 2000 ,
verbose = FALSE
)
SO_micro <- ScaleData (SO_micro, verbose = FALSE )
SO_micro <- RunPCA (
SO_micro,
npcs = 40 ,
reduction.name = "pca_micro" ,
reduction.key = "microPCA_" ,
verbose = FALSE
)
ElbowPlot (SO_micro, reduction = "pca_micro" , ndims = 40 ) +
labs (title = "Microglia PCA Elbow Plot" )
Run Harmony
Harmony is applied using sample identity as the grouping variable. Condition is not used as a Harmony variable because disease status is biologically relevant and should not be regressed out.
Show code
harmony_group_var <- "sample"
if (! harmony_group_var %in% colnames (SO_micro@ meta.data)) {
stop ("Harmony grouping variable not found in metadata: " , harmony_group_var)
}
SO_micro <- RunHarmony (
object = SO_micro,
group.by.vars = harmony_group_var,
reduction = "pca_micro" ,
reduction.save = "harmony_micro" ,
verbose = TRUE
)
Clustering and UMAP on Harmony Embeddings
Show code
set.seed (1234 )
dims_use_harmony <- 1 : 30
resolution_use_harmony <- 0.5
SO_micro <- FindNeighbors (
SO_micro,
reduction = "harmony_micro" ,
dims = dims_use_harmony,
graph.name = "micro_harmony_snn" ,
verbose = FALSE
)
SO_micro <- FindClusters (
SO_micro,
graph.name = "micro_harmony_snn" ,
resolution = resolution_use_harmony,
verbose = FALSE
)
SO_micro$ seurat_clusters_harmony <- as.character (SO_micro$ seurat_clusters)
SO_micro <- RunUMAP (
SO_micro,
reduction = "harmony_micro" ,
dims = dims_use_harmony,
reduction.name = "umap_micro_harmony" ,
reduction.key = "microHarmonyUMAP_" ,
verbose = FALSE
)
# Run UMAP on unintegrated PCA for comparison
SO_micro <- RunUMAP (
SO_micro,
reduction = "pca_micro" ,
dims = dims_use_harmony,
reduction.name = "umap_micro_unintegrated" ,
reduction.key = "microUnintegratedUMAP_" ,
verbose = FALSE
)
table (SO_micro$ seurat_clusters_harmony)
0 1 2 3 4 5
1523 1191 406 153 142 24
Microglia Cluster Composition per Sample
Show code
if (! "seurat_clusters_harmony" %in% colnames (SO_micro@ meta.data)) {
stop ("seurat_clusters_harmony not found. Run clustering first." )
}
cluster_fraction_by_sample <- SO_micro@ meta.data %>%
dplyr:: count (
sample,
condition,
sex,
seurat_clusters_harmony,
name = "n_cells"
) %>%
dplyr:: group_by (sample) %>%
dplyr:: mutate (
total_microglia = sum (n_cells),
cluster_percent = 100 * n_cells / total_microglia
) %>%
dplyr:: ungroup () %>%
dplyr:: mutate (
sample_label = paste0 (sample, " \n (N = " , total_microglia, ")" ),
percent_label = ifelse (
cluster_percent >= 5 ,
paste0 (round (cluster_percent, 1 ), "%" ),
""
),
hover_text = paste0 ("Cells in cluster: " , n_cells)
)
write.csv (
cluster_fraction_by_sample,
file.path (results_dir, "Step13A_cluster_composition_per_sample.csv" ),
row.names = FALSE
)
p_cluster_fraction <- ggplot (
cluster_fraction_by_sample,
aes (
x = sample_label,
y = cluster_percent,
fill = seurat_clusters_harmony,
text = hover_text
)
) +
geom_col (width = 0.75 , position = position_stack (reverse = TRUE )) +
geom_text (
aes (label = percent_label),
position = position_stack (vjust = 0.5 , reverse = TRUE ),
size = 3
) +
coord_flip () +
labs (
title = "Microglia Cluster Composition per Sample" ,
subtitle = "Each bar represents the composition of microglia subclusters within each sample" ,
x = NULL ,
y = "Percent of microglia cells" ,
fill = "Harmony cluster"
) +
theme_classic ()
plotly:: ggplotly (
p_cluster_fraction,
tooltip = "text"
)
Cluster Composition Data Table
Show code
DT:: datatable (
cluster_fraction_by_sample,
filter = "top" ,
rownames = FALSE ,
options = list (pageLength = 10 , scrollX = TRUE )
)
Visualise Clusters
Before and After Harmony Integration
Show code
p_before <- DimPlot (
SO_micro,
reduction = "umap_micro_unintegrated" ,
group.by = "sample_use" ,
shuffle = TRUE ,
pt.size = 0.4
) +
labs (title = "Before Harmony (PCA-based UMAP)" ) +
theme (legend.position = "none" )
p_after <- DimPlot (
SO_micro,
reduction = "umap_micro_harmony" ,
group.by = "sample_use" ,
shuffle = TRUE ,
pt.size = 0.4
) +
labs (title = "After Harmony" )
p_before | p_after
Cluster Annotations
Show code
p_harmony_cluster <- DimPlot (
SO_micro,
reduction = "umap_micro_harmony" ,
group.by = "seurat_clusters_harmony" ,
label = TRUE ,
repel = TRUE ,
pt.size = 0.5
) +
labs (title = "Harmony-Corrected Microglia Subclusters" )
p_harmony_condition <- DimPlot (
SO_micro,
reduction = "umap_micro_harmony" ,
group.by = "condition" ,
pt.size = 0.4
) +
labs (title = "Condition Distribution" )
p_harmony_sex <- DimPlot (
SO_micro,
reduction = "umap_micro_harmony" ,
group.by = "sex" ,
pt.size = 0.4
) +
labs (title = "Sex Distribution" )
p_harmony_sample <- DimPlot (
SO_micro,
reduction = "umap_micro_harmony" ,
group.by = "sample" ,
pt.size = 0.4
) +
labs (title = "Sample Distribution" )
p_split_condition <- DimPlot (
SO_micro,
reduction = "umap_micro_harmony" ,
group.by = "seurat_clusters_harmony" ,
split.by = "condition" ,
label = TRUE ,
repel = TRUE ,
pt.size = 0.35
) +
labs (title = "Clusters Split by Condition" )
p_split_sex <- DimPlot (
SO_micro,
reduction = "umap_micro_harmony" ,
group.by = "seurat_clusters_harmony" ,
split.by = "sex" ,
label = TRUE ,
repel = TRUE ,
pt.size = 0.35
) +
labs (title = "Clusters Split by Sex" )
p_split_sample <- DimPlot (
SO_micro,
reduction = "umap_micro_harmony" ,
group.by = "seurat_clusters_harmony" ,
split.by = "sample" ,
label = TRUE ,
repel = TRUE ,
pt.size = 0.25 ,
ncol = 4
) +
labs (title = "Clusters Split by Sample" )
Show code
p_harmony_cluster | p_harmony_condition
Show code
table (SO_micro$ condition)
Show code
Show code
p_harmony_cluster | p_harmony_sex
Show code
Female Male Unknown
1798 1641 0
Show code
Show code
p_harmony_cluster | p_harmony_sample
Show code
NOSN01_F NOSN02_F NOSN03_F NOSN04_M NOSN05_M NOSN06_M PDSN01_F PDSN02_F
357 120 412 628 202 29 120 182
PDSN04_F PDSN06_M PDSN07_M PDSN09_M
607 378 124 280
Show code