library(Seurat)
library(dplyr)
library(ggplot2)
# Catch the baton: Load the clustered SO from the previous step
import_path <- "/Users/yoshimurasouhei/Downloads/010_school/4年生/bioinfomaticsリサーチクラークシップ/PD_2026/scripts/SO_05_Clustered.rds"
SO <- readRDS(import_path)Step 10: Cell Type Annotation
Setup: Environment and Data
In this final step of the single-sample pipeline, we assign biological identities to our mathematical clusters and define the marker genes for these overarching cell types. First, we load the necessary libraries and the clustered dataset.
Step 10A: Annotating Clusters We map the numeric clusters to their biological cell types based on the classical marker genes we reviewed in Step 9. Several clusters often merge into a single broad lineage (e.g., Clusters 0, 1, 13, and 14 all representing Neurons).
new.cluster.ids <- c(
"0" = "Unassigned", # Low logFC, mostly lncRNAs
"1" = "Astrocyte", # S100B, GPD1
"2" = "Oligodendrocyte", # OPALIN, CD9, PLLP
"3" = "Unassigned", # Low logFC, ADAMTS18 overlap
"4" = "Astrocyte", # SLC14A1 (Ependymal-like)
"5" = "Stressed Cell", # HSPA1A, HSPA1B, CRYAB
"6" = "Microglia / Macrophage", # SYK, CD86, MS4A6A
"7" = "OPCs", # PDGFRA, PCDH15
"8" = "Unassigned", # Extremely low logFC, ADAMTS18
"9" = "Endothelial Cell", # CLDN5, EMCN, ABCB1
"10"= "Inhibitory Neuron", # GAD2, EBF3, HOXB8
"11"= "Fibroblast / Pericyte", # COL1A2, FOXD1, NOTCH3
"12"= "Reactive Astrocyte", # CHI3L1, TNC
"13"= "Unassigned Glial", # CYP4F12
"14"= "Neuron", # MCHR2, CBLN2
"15"= "Neuron", # ST8SIA2
"16"= "Excitatory Neuron", # NEUROD1, GRM4, UNC13C
"17"= "T Cell / NK Cell", # CD3E, CD2, IL7R, GZMA
"18"= "Fibroblast" # COL6A3, CEMIP
)
# Apply the new biological names to the Seurat Object
SO <- Seurat::RenameIdents(SO, new.cluster.ids)
# 確認用のUMAPプロット
clusters_annot <- Seurat::DimPlot(SO,
reduction = "umap",
label = TRUE,
repel = TRUE,
pt.size = 0.5) +
Seurat::NoLegend()
clusters_annotStep 10B: Characterizing Final Cell Types
Because we have merged multiple sub-clusters, we perform differential expression analysis one final time to identify the core marker genes that define these broad cell type categories.
# 1. Find features per cell type
markers <- Seurat::FindAllMarkers(SO,
only.pos = TRUE,
min.pct = 0.25,
verbose = FALSE)
# 2. Translate Ensembl IDs to Gene Symbols for readability
library(AnnotationDbi)
library(org.Hs.eg.db)
markers$symbol <- mapIds(org.Hs.eg.db,
keys = markers$gene,
column = "SYMBOL",
keytype = "ENSEMBL",
multiVals = "first")
markers$symbol <- ifelse(is.na(markers$symbol), markers$gene, markers$symbol)
# 3. Group by cell type and select the top 5 genes based on fold change
top_fc <- markers |>
dplyr::group_by(cluster) |>
dplyr::slice_max(n = 5, order_by = avg_log2FC)
# 4. Downsample to prevent memory crashes during heatmap generation
SO_subset <- subset(SO, downsample = 500)
# 5. Create Heatmap
Seurat::DoHeatmap(SO_subset,
features = top_fc$symbol,
label = TRUE,
angle = 45) +
Seurat::NoLegend()Final Save: Single Sample Pipeline Complete
# Define the final save path
save_path <- "/Users/yoshimurasouhei/Downloads/010_school/4年生/bioinfomaticsリサーチクラークシップ/PD_2026/scripts/SO_07_Annotated.RDS"
# Save the final annotated Seurat Object
saveRDS(object = SO, file = save_path)
print("Analysis Complete! Final object saved successfully.")[1] "Analysis Complete! Final object saved successfully."