Setup: Environment and Data
This file starts from the microglia object generated in Step 13 .
The input object should already contain:
microglia cells
microglia subclusters
homeostatic / DAM / inflammatory / phagocytic / interferon response module scores
condition information
The main biological goal is to identify lncRNAs associated with a cluster-defined microglia state transition:
Homeostatic microglia
→ activated / disease-associated microglia (DAM)
This is a microglia state analysis , not a broad cell-type discovery analysis.
Show code
library (Seurat)
library (tidyverse)
library (patchwork)
set.seed (1234 )
project_dir <- "/Users/yoshimurasouhei/Downloads/010_school/4年生/bioinfomaticsリサーチクラークシップ/PD_2026"
input_path <- file.path (
project_dir,
"results" ,
"Step13_microglia_harmony_reclustering" ,
"SO_micro_step13_fernando_harmony_reclustered_scored.rds"
)
results_dir <- file.path (
project_dir,
"results" ,
"Step14_microglia_lncRNA_exploration"
)
dir.create (results_dir, recursive = TRUE , showWarnings = FALSE )
SO_micro <- readRDS (input_path)
DefaultAssay (SO_micro) <- "RNA"
if ("Assay5" %in% class (SO_micro[["RNA" ]])) {
SO_micro <- JoinLayers (SO_micro, assay = "RNA" )
}
SO_micro
An object of class Seurat
87674 features across 3439 samples within 2 assays
Active assay: RNA (47773 features, 2000 variable features)
3 layers present: counts, data, scale.data
1 other assay present: SCT
7 dimensional reductions calculated: pca, umap, harmony, pca_micro, harmony_micro, umap_micro_harmony, umap_micro_unintegrated
Show code
required_cols <- c (
"seurat_clusters_harmony" ,
"condition" ,
"sample" ,
"sex" ,
"Micro_homeostatic_score1" ,
"DAM_score1" ,
"Inflammatory_score1" ,
"Phagocytic_score1" ,
"Interferon_response_score1" ,
"umap_micro_harmony"
)
missing_cols <- setdiff (required_cols, c (colnames (SO_micro@ meta.data), Reductions (SO_micro)))
if (length (missing_cols) > 0 ) {
stop (
paste (
"Missing required Step 13 outputs:" ,
paste (missing_cols, collapse = ", " )
)
)
}
table (SO_micro$ seurat_clusters_harmony)
0 1 2 3 4 5
1523 1191 406 153 142 24
Show code
table (SO_micro$ condition)
Show code
stopifnot ("umap_micro_harmony" %in% Reductions (SO_micro))
stopifnot ("seurat_clusters_harmony" %in% colnames (SO_micro@ meta.data))
stopifnot (! any (is.na (SO_micro$ seurat_clusters_harmony)))
Step 14.1: Refine Microglia State Annotation
Here, the main state definition is cluster-based , not quartile-based.
Based on Step 13 cluster summaries, you should define your main groups:
DAM:
[update clusters]
Homeostatic:
[update clusters]
Other / excluded from main binary DE:
[update clusters]
Clusters excluded from the main binary DE comparison might represent background-stress-like or minor/intermediate states rather than the two extreme states used for candidate prioritization.
The combined activation score is calculated only for visualization and summary. It is not used to define the main DE groups.
Show code
SO_micro$ micro_cluster <- as.character (SO_micro$ seurat_clusters_harmony)
dam_clusters <- c ("1" )
homeostatic_clusters <- c ("0" )
other_clusters <- c ("2" , "3" , "4" , "5" )
if (length (c (dam_clusters, homeostatic_clusters, other_clusters)) == 0 ) {
message ("Please define DAM, Homeostatic, and Other clusters based on Step 13." )
SO_micro$ micro_state_refined <- "Unassigned"
} else {
SO_micro$ micro_state_refined <- case_when (
SO_micro$ micro_cluster %in% dam_clusters ~ "DAM" ,
SO_micro$ micro_cluster %in% homeostatic_clusters ~ "Homeostatic" ,
SO_micro$ micro_cluster %in% other_clusters ~ "Other" ,
TRUE ~ "Unassigned"
)
}
SO_micro$ micro_state_refined <- factor (
SO_micro$ micro_state_refined,
levels = c ("DAM" , "Homeostatic" , "Other" , "Unassigned" )
)
table (SO_micro$ micro_cluster, SO_micro$ micro_state_refined)
DAM Homeostatic Other Unassigned
0 0 1523 0 0
1 1191 0 0 0
2 0 0 406 0
3 0 0 153 0
4 0 0 142 0
5 0 0 24 0
Show code
table (SO_micro$ micro_state_refined, SO_micro$ condition)
C PD
DAM 565 626
Homeostatic 745 778
Other 438 287
Unassigned 0 0
Show code
activation_score_matrix <- SO_micro@ meta.data[, c (
"DAM_score1" ,
"Inflammatory_score1" ,
"Phagocytic_score1" ,
"Interferon_response_score1"
)]
SO_micro$ combined_activation_score_z <- rowMeans (
scale (activation_score_matrix),
na.rm = TRUE
)
summary (SO_micro$ combined_activation_score_z)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-1.4047 -0.5178 -0.1057 0.0000 0.4396 3.2179
Show code
micro_state_cluster_summary <- SO_micro@ meta.data %>%
mutate (
micro_cluster = as.character (seurat_clusters_harmony),
micro_state_refined = as.character (micro_state_refined)
) %>%
group_by (micro_cluster, micro_state_refined) %>%
summarise (
n_cells = n (),
mean_homeostatic = mean (Micro_homeostatic_score1, na.rm = TRUE ),
mean_combined_activation_z = mean (combined_activation_score_z, na.rm = TRUE ),
mean_dam = mean (DAM_score1, na.rm = TRUE ),
mean_inflammatory = mean (Inflammatory_score1, na.rm = TRUE ),
mean_phagocytic = mean (Phagocytic_score1, na.rm = TRUE ),
mean_interferon = mean (Interferon_response_score1, na.rm = TRUE ),
percent_PD = mean (condition == "PD" , na.rm = TRUE ),
percent_C = mean (condition == "C" , na.rm = TRUE ),
.groups = "drop"
) %>%
arrange (desc (mean_combined_activation_z))
DT:: datatable (
micro_state_cluster_summary,
rownames = FALSE ,
filter = "top" ,
options = list (
pageLength = 10 ,
autoWidth = TRUE ,
scrollX = TRUE
)
)
Show code
micro_state_group_summary <- SO_micro@ meta.data %>%
mutate (
micro_cluster = as.character (seurat_clusters_harmony),
micro_state_refined = as.character (micro_state_refined)
) %>%
group_by (micro_state_refined) %>%
summarise (
n_cells = n (),
mean_homeostatic = mean (Micro_homeostatic_score1, na.rm = TRUE ),
mean_combined_activation_z = mean (combined_activation_score_z, na.rm = TRUE ),
mean_dam = mean (DAM_score1, na.rm = TRUE ),
mean_inflammatory = mean (Inflammatory_score1, na.rm = TRUE ),
mean_phagocytic = mean (Phagocytic_score1, na.rm = TRUE ),
mean_interferon = mean (Interferon_response_score1, na.rm = TRUE ),
percent_PD = mean (condition == "PD" , na.rm = TRUE ),
percent_C = mean (condition == "C" , na.rm = TRUE ),
clusters_included = paste (sort (unique (micro_cluster)), collapse = ", " ),
.groups = "drop"
) %>%
arrange (desc (mean_combined_activation_z))
DT:: datatable (
micro_state_group_summary,
rownames = FALSE ,
filter = "top" ,
options = list (
pageLength = 10 ,
autoWidth = TRUE ,
scrollX = TRUE
)
)
Show code
p_state <- DimPlot (
SO_micro,
reduction = "umap_micro_harmony" ,
group.by = "micro_state_refined" ,
label = TRUE ,
repel = TRUE ,
pt.size = 0.5
) +
labs (title = "Cluster-Based Refined Microglia State Annotation" )
p_score <- FeaturePlot (
SO_micro,
features = "combined_activation_score_z" ,
reduction = "umap_micro_harmony" ,
pt.size = 0.5
) +
labs (title = "Z-scored Combined Microglia Activation Score" )
p_state | p_score
Show code
SO_micro$ micro_DE_group <- case_when (
SO_micro$ micro_state_refined == "DAM" ~ "DAM" ,
SO_micro$ micro_state_refined == "Homeostatic" ~ "Homeostatic" ,
TRUE ~ NA_character_
)
SO_micro$ micro_DE_group <- factor (
SO_micro$ micro_DE_group,
levels = c ("Homeostatic" , "DAM" )
)
table (SO_micro$ micro_DE_group, useNA = "ifany" )
Homeostatic DAM <NA>
1523 1191 725
Show code
table (SO_micro$ micro_DE_group, SO_micro$ condition, useNA = "ifany" )
C PD
Homeostatic 745 778
DAM 565 626
<NA> 438 287
Show code
write.csv (
micro_state_cluster_summary,
file.path (results_dir, "Step14_1_refined_microglia_state_cluster_summary.csv" ),
row.names = FALSE
)
write.csv (
micro_state_group_summary,
file.path (results_dir, "Step14_1_refined_microglia_state_group_summary.csv" ),
row.names = FALSE
)
write.csv (
as.data.frame.matrix (table (SO_micro$ micro_state_refined, SO_micro$ condition)),
file.path (results_dir, "Step14_1_refined_microglia_state_condition_table.csv" )
)
write.csv (
as.data.frame.matrix (table (SO_micro$ micro_cluster, SO_micro$ micro_state_refined)),
file.path (results_dir, "Step14_1_cluster_to_refined_state_table.csv" )
)
Step 14.2: Define Comparison Groups for Differential Expression
We define two main comparisons:
1. DAM vs Homeostatic
2. PD vs C within microglia
The first comparison prioritizes cluster-defined microglia state-associated genes.
The second comparison prioritizes disease-associated genes across all microglia.
Show code
SO_micro_main_DE <- subset (
SO_micro,
subset = ! is.na (micro_DE_group)
)
SO_micro_main_DE$ micro_DE_group <- droplevels (SO_micro_main_DE$ micro_DE_group)
table (SO_micro_main_DE$ micro_DE_group)
Homeostatic DAM
1523 1191
Show code
table (SO_micro_main_DE$ micro_DE_group, SO_micro_main_DE$ condition)
C PD
Homeostatic 745 778
DAM 565 626
Show code
min_pct_use <- 0.05
logfc_threshold_use <- 0.10
p_adj_cutoff <- 0.05
lncRNA_logfc_cutoff <- 0.25
min_pct_use
Show code
Show code
Show code
Step 14.3: Differential Expression Analysis
We run DE for:
DAM vs Homeostatic
PD vs C
Show code
if (length (unique (SO_micro_main_DE$ micro_DE_group)) == 2 ) {
Idents (SO_micro_main_DE) <- SO_micro_main_DE$ micro_DE_group
micro_dam_vs_homeostatic_DE <- FindMarkers (
SO_micro_main_DE,
ident.1 = "DAM" ,
ident.2 = "Homeostatic" ,
min.pct = min_pct_use,
logfc.threshold = logfc_threshold_use,
test.use = "wilcox"
) %>%
rownames_to_column ("gene" ) %>%
arrange (p_val_adj, desc (avg_log2FC))
print (micro_dam_vs_homeostatic_DE %>% slice_head (n = 20 ))
} else {
message ("Not enough groups for DAM vs Homeostatic DE. Make sure to define clusters in Step 14.1." )
micro_dam_vs_homeostatic_DE <- data.frame (gene = character ())
}
gene p_val avg_log2FC pct.1 pct.2 p_val_adj
1 FTL 1.095949e-228 2.454705 0.942 0.512 5.235677e-224
2 RPS19 4.141249e-212 2.193070 0.898 0.387 1.978399e-207
3 APOE 3.479507e-195 2.364915 0.864 0.389 1.662265e-190
4 RPL11 8.837063e-193 2.673218 0.771 0.225 4.221730e-188
5 RPL13 8.781991e-187 2.205156 0.872 0.406 4.195421e-182
6 TPT1 3.702848e-185 2.132740 0.879 0.422 1.768962e-180
7 RPL19 1.034972e-182 2.498880 0.767 0.222 4.944371e-178
8 RPS23 1.465859e-179 2.810964 0.691 0.156 7.002849e-175
9 EEF1A1 2.251186e-179 1.896427 0.890 0.446 1.075459e-174
10 RPL32 4.398240e-179 2.758371 0.709 0.177 2.101171e-174
11 RPLP1 9.508172e-177 2.145385 0.853 0.362 4.542339e-172
12 CD74 8.270541e-175 1.716980 0.930 0.531 3.951086e-170
13 RPS27 2.493567e-172 2.938598 0.679 0.160 1.191252e-167
14 C1QC 4.047387e-172 2.431164 0.748 0.242 1.933558e-167
15 HLA-DRA 1.246920e-171 2.479459 0.743 0.233 5.956910e-167
16 TMSB4X 8.999113e-171 1.903514 0.888 0.445 4.299146e-166
17 RPS2 2.917229e-170 2.512467 0.714 0.191 1.393648e-165
18 B2M 1.305739e-168 2.181601 0.793 0.303 6.237909e-164
19 RPL7A 1.806646e-164 2.573908 0.672 0.160 8.630890e-160
20 RPS28 1.256351e-163 2.047980 0.816 0.323 6.001966e-159
Show code
Idents (SO_micro) <- SO_micro$ condition
micro_PD_vs_C_DE <- FindMarkers (
SO_micro,
ident.1 = "PD" ,
ident.2 = "C" ,
min.pct = min_pct_use,
logfc.threshold = logfc_threshold_use,
test.use = "wilcox"
) %>%
rownames_to_column ("gene" ) %>%
arrange (p_val_adj, desc (avg_log2FC))
DT:: datatable (
micro_PD_vs_C_DE,
rownames = FALSE ,
filter = "top" ,
options = list (
pageLength = 10 ,
autoWidth = TRUE ,
scrollX = TRUE
)
)
Step 14.4: Cluster-Specific DE Analysis
Here we extract markers for each microglia subcluster.
These are subcluster/state markers , not broad microglia identity markers.
Show code
Idents (SO_micro) <- SO_micro$ seurat_clusters_harmony
micro_cluster_markers <- FindAllMarkers (
SO_micro,
only.pos = TRUE ,
min.pct = min_pct_use,
logfc.threshold = logfc_threshold_use,
test.use = "wilcox"
) %>%
arrange (cluster, p_val_adj, desc (avg_log2FC))
micro_cluster_markers %>% head (30 )
Show code
top_micro_cluster_markers <- micro_cluster_markers %>%
group_by (cluster) %>%
slice_min (order_by = p_val_adj, n = 20 , with_ties = FALSE ) %>%
ungroup ()
top_micro_cluster_markers
Step 14.5 / 14.6: Annotate DE Results with lncRNA Context
Show code
lncRNA_annotation_path <- file.path (
project_dir,
"results" ,
"Step13_microglia_harmony_reclustering" ,
"Step13_microglia_lncRNA_gene_annotation_context_based.csv"
)
if (! file.exists (lncRNA_annotation_path)) {
stop (
paste (
"Step 13 context-based lncRNA annotation file not found:" ,
lncRNA_annotation_path
)
)
}
lncRNA_gene_annotation <- read.csv (
lncRNA_annotation_path,
stringsAsFactors = FALSE
) %>%
as_tibble ()
required_lncRNA_anno_cols <- c (
"gene" ,
"gencode_gene_id" ,
"gencode_gene_name" ,
"lncRNA_type" ,
"is_lncRNA"
)
missing_lncRNA_anno_cols <- setdiff (
required_lncRNA_anno_cols,
colnames (lncRNA_gene_annotation)
)
if (length (missing_lncRNA_anno_cols) > 0 ) {
stop (
paste (
"Missing columns in Step 13 lncRNA annotation:" ,
paste (missing_lncRNA_anno_cols, collapse = ", " )
)
)
}
lncRNA_gene_annotation <- lncRNA_gene_annotation %>%
dplyr:: select (
gene,
gencode_gene_id,
gencode_gene_name,
lncRNA_type,
is_lncRNA
) %>%
distinct (gene, .keep_all = TRUE )
lncRNA_gene_annotation %>%
count (lncRNA_type, name = "n_genes" ) %>%
arrange (desc (n_genes))
Show code
annotate_de_table_with_lncRNA_context <- function (de_table) {
if (nrow (de_table) == 0 ) return (de_table)
de_table %>%
left_join (
lncRNA_gene_annotation,
by = "gene"
) %>%
mutate (
is_lncRNA = if_else (is.na (is_lncRNA), FALSE , is_lncRNA),
lncRNA_type = if_else (
is.na (lncRNA_type),
"Not_lncRNA_or_unmatched" ,
lncRNA_type
),
gene_symbol_for_plot = case_when (
! is.na (gencode_gene_name) ~ gencode_gene_name,
TRUE ~ gene
)
)
}
micro_dam_vs_homeostatic_annotated <- annotate_de_table_with_lncRNA_context (
micro_dam_vs_homeostatic_DE
)
micro_PD_vs_C_annotated <- annotate_de_table_with_lncRNA_context (
micro_PD_vs_C_DE
)
micro_cluster_markers_annotated <- annotate_de_table_with_lncRNA_context (
micro_cluster_markers
)
Step 14.7: Extract Annotated lncRNA Candidates
We extract lncRNA-like candidates from each comparison.
Show code
if (nrow (micro_dam_vs_homeostatic_annotated) > 0 ) {
dam_lncRNA_candidates <- micro_dam_vs_homeostatic_annotated %>%
filter (
is_lncRNA,
p_val_adj < p_adj_cutoff,
abs (avg_log2FC) >= lncRNA_logfc_cutoff
) %>%
arrange (p_val_adj, desc (abs (avg_log2FC)))
} else {
dam_lncRNA_candidates <- data.frame (gene = character ())
}
PD_lncRNA_candidates <- micro_PD_vs_C_annotated %>%
filter (
is_lncRNA,
p_val_adj < p_adj_cutoff,
abs (avg_log2FC) >= lncRNA_logfc_cutoff
) %>%
arrange (p_val_adj, desc (abs (avg_log2FC)))
cluster_lncRNA_candidates <- micro_cluster_markers_annotated %>%
filter (
is_lncRNA,
p_val_adj < p_adj_cutoff,
abs (avg_log2FC) >= lncRNA_logfc_cutoff
) %>%
arrange (cluster, p_val_adj, desc (avg_log2FC))
if (nrow (dam_lncRNA_candidates) > 0 ) {
print (dam_lncRNA_candidates %>%
dplyr:: select (gene, gene_symbol_for_plot, lncRNA_type, avg_log2FC, pct.1 , pct.2 , p_val_adj) %>%
slice_head (n = 30 ))
}
gene gene_symbol_for_plot lncRNA_type avg_log2FC pct.1 pct.2
1 MALAT1 MALAT1 LINC 0.6714575 1.000 1.000
2 ENSG00000296425 ENSG00000296425 Antisense 1.8225975 0.630 0.225
3 ADAM7-AS1 ADAM7-AS1 Antisense 1.8091683 0.439 0.128
4 TALAM1 TALAM1 LINC 1.5954060 0.351 0.112
5 HLA-DRB6 HLA-DRB6 LINC 2.2677500 0.189 0.032
6 NEAT1 NEAT1 LINC -0.6334787 0.898 0.915
7 SNHG29 SNHG29 Antisense 1.1624198 0.296 0.104
8 ENSG00000309976 ENSG00000309976 Antisense 1.6934093 0.242 0.076
9 SNHG6 SNHG6 Antisense 1.2683480 0.262 0.092
10 SNHG8 SNHG8 Antisense 1.9131695 0.167 0.035
11 ENSG00000286618 ENSG00000286618 Antisense 1.8047126 0.231 0.079
12 XIST XIST LINC -1.2976366 0.306 0.457
13 ENSG00000265987 ENSG00000265987 Antisense 1.9337850 0.160 0.039
14 ENSG00000228655 ENSG00000228655 Antisense 1.6087027 0.187 0.056
15 ZFAS1 ZFAS1 Antisense 1.1553797 0.223 0.083
16 TBC1D22A-DT TBC1D22A-DT LINC 3.0025319 0.100 0.015
17 ENSG00000297005 ENSG00000297005 Antisense 1.2346967 0.202 0.074
18 ENSG00000288853 ENSG00000288853 Antisense 1.4891391 0.154 0.046
19 ABHD15-AS1 ABHD15-AS1 Antisense 1.0700029 0.235 0.098
20 ENSG00000307379 ENSG00000307379 Antisense 1.4734386 0.175 0.060
21 ENSG00000302619 ENSG00000302619 LINC -1.2335708 0.221 0.356
22 ENSG00000307107 ENSG00000307107 Antisense 1.5725877 0.134 0.040
23 ENSG00000286145 ENSG00000286145 Antisense 1.6920963 0.106 0.024
24 ENSG00000300996 ENSG00000300996 Antisense 1.9160502 0.105 0.024
25 ENSG00000296449 ENSG00000296449 Antisense 2.5243396 0.084 0.014
26 ENSG00000274441 ENSG00000274441 Antisense 1.2741147 0.132 0.041
27 GAS5 GAS5 Antisense 1.0910310 0.183 0.075
28 ZFHX3-AS1 ZFHX3-AS1 Antisense 1.5964842 0.107 0.028
29 ENSG00000250362 ENSG00000250362 Antisense 1.5848590 0.118 0.035
30 AKAP13-AS1 AKAP13-AS1 Antisense 1.7004182 0.089 0.019
p_val_adj
1 4.209140e-98
2 4.258754e-98
3 2.558766e-67
4 4.394103e-45
5 7.580729e-36
6 2.559256e-31
7 1.547894e-29
8 2.700286e-28
9 2.406287e-26
10 2.563382e-26
11 8.059606e-24
12 1.692881e-23
13 3.104934e-22
14 2.473728e-21
15 1.028157e-18
16 4.071630e-18
17 2.068147e-17
18 9.380344e-17
19 9.421254e-17
20 1.746406e-16
21 5.141314e-14
22 5.506287e-14
23 5.739029e-14
24 1.036397e-13
25 2.536082e-13
26 1.325480e-12
27 2.289196e-12
28 3.267779e-12
29 3.748365e-12
30 5.865161e-12
Show code
PD_lncRNA_candidates %>%
dplyr:: select (gene, gene_symbol_for_plot, lncRNA_type, avg_log2FC, pct.1 , pct.2 , p_val_adj) %>%
slice_head (n = 30 )
Show code
cluster_lncRNA_candidates %>%
dplyr:: select (cluster, gene, gene_symbol_for_plot, lncRNA_type, avg_log2FC, pct.1 , pct.2 , p_val_adj) %>%
slice_head (n = 30 )
Step 14.8: Prioritize lncRNAs Shared Across Comparisons
The strongest candidates are lncRNAs associated with both the activated/DAM state and PD status.
Show code
if (nrow (dam_lncRNA_candidates) > 0 ) {
priority_micro_lncRNAs <- dam_lncRNA_candidates %>%
dplyr:: select (
gene,
gene_symbol_for_plot,
gencode_gene_id,
gencode_gene_name,
lncRNA_type,
avg_log2FC_dam_vs_homeostatic = avg_log2FC,
p_val_adj_dam_vs_homeostatic = p_val_adj,
pct.1_dam = pct.1 ,
pct.2_homeostatic = pct.2
) %>%
inner_join (
PD_lncRNA_candidates %>%
dplyr:: select (
gene,
avg_log2FC_PD_vs_C = avg_log2FC,
p_val_adj_PD_vs_C = p_val_adj,
pct.1_PD = pct.1 ,
pct.2_C = pct.2
),
by = "gene"
) %>%
mutate (
same_direction = sign (avg_log2FC_dam_vs_homeostatic) == sign (avg_log2FC_PD_vs_C),
priority_score =
- log10 (p_val_adj_dam_vs_homeostatic + 1e-300 ) +
- log10 (p_val_adj_PD_vs_C + 1e-300 ) +
abs (avg_log2FC_dam_vs_homeostatic) +
abs (avg_log2FC_PD_vs_C)
) %>%
arrange (desc (same_direction), desc (priority_score))
print (priority_micro_lncRNAs)
} else {
priority_micro_lncRNAs <- data.frame (gene = character ())
message ("DAM clusters not defined, so priority shared lncRNAs cannot be calculated." )
}
gene gene_symbol_for_plot gencode_gene_id gencode_gene_name
1 DLEU1 DLEU1 ENSG00000176124.16 DLEU1
2 ENSG00000287616 ENSG00000287616 ENSG00000287616.2 ENSG00000287616
3 LINC02712 LINC02712 ENSG00000273409.4 LINC02712
4 XACT XACT ENSG00000241743.5 XACT
5 MIR223HG MIR223HG ENSG00000274536.10 MIR223HG
6 FCGR1BP FCGR1BP ENSG00000291135.2 FCGR1BP
7 ENSG00000290735 ENSG00000290735 ENSG00000290735.2 ENSG00000290735
8 LINC02397 LINC02397 ENSG00000205056.9 LINC02397
9 ENSG00000308579 ENSG00000308579 ENSG00000308579.1 ENSG00000308579
10 SNHG15 SNHG15 ENSG00000232956.11 SNHG15
11 ADAM7-AS1 ADAM7-AS1 ENSG00000253535.6 ADAM7-AS1
12 NEAT1 NEAT1 ENSG00000245532.13 NEAT1
13 ENSG00000302619 ENSG00000302619 ENSG00000302619.1 ENSG00000302619
14 LNCAROD LNCAROD ENSG00000231131.11 LNCAROD
15 LINC02798 LINC02798 ENSG00000227082.4 LINC02798
16 RNASEH2B-AS1 RNASEH2B-AS1 ENSG00000233672.10 RNASEH2B-AS1
17 LINC01678 LINC01678 ENSG00000225331.3 LINC01678
18 ENSG00000289899 ENSG00000289899 ENSG00000289899.2 ENSG00000289899
lncRNA_type avg_log2FC_dam_vs_homeostatic p_val_adj_dam_vs_homeostatic
1 Antisense -0.6635837 1.847872e-10
2 LINC -0.8497620 2.345796e-03
3 LINC -1.5901997 1.455275e-07
4 LINC -1.6214382 4.110563e-06
5 LINC 0.3735245 4.587693e-03
6 LINC 0.6726387 1.307758e-04
7 Sense_overlapping 1.0989958 4.037941e-04
8 LINC 1.1166999 2.246377e-03
9 LINC 1.1730522 1.750759e-02
10 LINC 1.1985005 3.286326e-04
11 Antisense 1.8091683 2.558766e-67
12 LINC -0.6334787 2.559256e-31
13 LINC -1.2335708 5.141314e-14
14 LINC -0.6106552 3.566862e-06
15 LINC -1.0441560 9.584445e-10
16 Antisense 0.8093342 2.578993e-02
17 Antisense 0.5889772 5.210789e-05
18 LINC 1.2162408 3.067046e-02
pct.1_dam pct.2_homeostatic avg_log2FC_PD_vs_C p_val_adj_PD_vs_C pct.1_PD
1 0.698 0.692 -0.6094611 1.680843e-17 0.640
2 0.303 0.369 -1.1227391 3.771405e-21 0.257
3 0.123 0.214 -1.6596257 1.126833e-14 0.106
4 0.160 0.246 -0.9300041 8.087780e-10 0.151
5 0.280 0.185 1.0962312 1.622925e-11 0.273
6 0.160 0.083 1.1254378 3.850750e-07 0.144
7 0.094 0.039 1.4897168 4.384791e-06 0.083
8 0.077 0.030 1.4963353 3.326432e-05 0.066
9 0.068 0.027 1.3172593 7.303820e-04 0.065
10 0.071 0.024 1.1284589 2.842268e-02 0.062
11 0.439 0.128 -0.9963488 4.951895e-06 0.213
12 0.898 0.915 0.5400884 1.854489e-23 0.936
13 0.221 0.356 0.3912819 3.443363e-13 0.348
14 0.636 0.659 0.6367654 2.886327e-20 0.704
15 0.411 0.483 0.2786523 1.818758e-02 0.484
16 0.071 0.030 -2.0492466 9.704099e-06 0.024
17 0.112 0.047 -1.4055090 2.841494e-03 0.050
18 0.051 0.017 -2.4140107 5.314744e-04 0.017
pct.2_C same_direction priority_score
1 0.732 TRUE 27.780846
2 0.406 TRUE 25.025708
3 0.212 TRUE 24.035021
4 0.252 TRUE 17.029712
5 0.165 TRUE 14.597863
6 0.073 TRUE 12.096004
7 0.032 TRUE 11.340604
8 0.023 TRUE 9.739574
9 0.025 TRUE 7.383535
10 0.027 TRUE 7.356584
11 0.292 FALSE 74.702715
12 0.894 FALSE 54.497229
13 0.203 FALSE 27.376796
14 0.558 FALSE 26.234789
15 0.384 FALSE 12.081466
16 0.070 FALSE 9.460175
17 0.097 FALSE 8.824036
18 0.053 FALSE 8.418049
Show code
if (nrow (priority_micro_lncRNAs) > 0 ) {
cluster_lncRNA_support <- cluster_lncRNA_candidates %>%
group_by (gene) %>%
summarise (
cluster_support = paste (sort (unique (cluster)), collapse = ";" ),
max_cluster_log2FC = max (avg_log2FC, na.rm = TRUE ),
min_cluster_p_val_adj = min (p_val_adj, na.rm = TRUE ),
.groups = "drop"
)
priority_micro_lncRNAs <- priority_micro_lncRNAs %>%
left_join (cluster_lncRNA_support, by = "gene" ) %>%
arrange (desc (same_direction), desc (priority_score))
print (
priority_micro_lncRNAs %>%
dplyr:: select (
gene,
gene_symbol_for_plot,
avg_log2FC_dam_vs_homeostatic,
avg_log2FC_PD_vs_C,
p_val_adj_dam_vs_homeostatic,
p_val_adj_PD_vs_C,
same_direction,
priority_score,
cluster_support
) %>%
slice_head (n = 30 )
)
}
gene gene_symbol_for_plot avg_log2FC_dam_vs_homeostatic
1 DLEU1 DLEU1 -0.6635837
2 ENSG00000287616 ENSG00000287616 -0.8497620
3 LINC02712 LINC02712 -1.5901997
4 XACT XACT -1.6214382
5 MIR223HG MIR223HG 0.3735245
6 FCGR1BP FCGR1BP 0.6726387
7 ENSG00000290735 ENSG00000290735 1.0989958
8 LINC02397 LINC02397 1.1166999
9 ENSG00000308579 ENSG00000308579 1.1730522
10 SNHG15 SNHG15 1.1985005
11 ADAM7-AS1 ADAM7-AS1 1.8091683
12 NEAT1 NEAT1 -0.6334787
13 ENSG00000302619 ENSG00000302619 -1.2335708
14 LNCAROD LNCAROD -0.6106552
15 LINC02798 LINC02798 -1.0441560
16 RNASEH2B-AS1 RNASEH2B-AS1 0.8093342
17 LINC01678 LINC01678 0.5889772
18 ENSG00000289899 ENSG00000289899 1.2162408
avg_log2FC_PD_vs_C p_val_adj_dam_vs_homeostatic p_val_adj_PD_vs_C
1 -0.6094611 1.847872e-10 1.680843e-17
2 -1.1227391 2.345796e-03 3.771405e-21
3 -1.6596257 1.455275e-07 1.126833e-14
4 -0.9300041 4.110563e-06 8.087780e-10
5 1.0962312 4.587693e-03 1.622925e-11
6 1.1254378 1.307758e-04 3.850750e-07
7 1.4897168 4.037941e-04 4.384791e-06
8 1.4963353 2.246377e-03 3.326432e-05
9 1.3172593 1.750759e-02 7.303820e-04
10 1.1284589 3.286326e-04 2.842268e-02
11 -0.9963488 2.558766e-67 4.951895e-06
12 0.5400884 2.559256e-31 1.854489e-23
13 0.3912819 5.141314e-14 3.443363e-13
14 0.6367654 3.566862e-06 2.886327e-20
15 0.2786523 9.584445e-10 1.818758e-02
16 -2.0492466 2.578993e-02 9.704099e-06
17 -1.4055090 5.210789e-05 2.841494e-03
18 -2.4140107 3.067046e-02 5.314744e-04
same_direction priority_score cluster_support
1 TRUE 27.780846 0
2 TRUE 25.025708 0
3 TRUE 24.035021 0
4 TRUE 17.029712 0
5 TRUE 14.597863 1
6 TRUE 12.096004 1
7 TRUE 11.340604 1
8 TRUE 9.739574 1
9 TRUE 7.383535 <NA>
10 TRUE 7.356584 1
11 FALSE 74.702715 1
12 FALSE 54.497229 0;4
13 FALSE 27.376796 0
14 FALSE 26.234789 0
15 FALSE 12.081466 0
16 FALSE 9.460175 <NA>
17 FALSE 8.824036 1
18 FALSE 8.418049 <NA>
Show code
if (nrow (priority_micro_lncRNAs) > 0 ) {
driver_lncRNAs <- priority_micro_lncRNAs %>%
filter (
same_direction,
avg_log2FC_dam_vs_homeostatic > 0 ,
avg_log2FC_PD_vs_C > 0
)
defender_lncRNAs <- priority_micro_lncRNAs %>%
filter (
same_direction,
avg_log2FC_dam_vs_homeostatic < 0 ,
avg_log2FC_PD_vs_C < 0
)
print (driver_lncRNAs)
print (defender_lncRNAs)
} else {
driver_lncRNAs <- data.frame ()
defender_lncRNAs <- data.frame ()
}
gene gene_symbol_for_plot gencode_gene_id gencode_gene_name
1 MIR223HG MIR223HG ENSG00000274536.10 MIR223HG
2 FCGR1BP FCGR1BP ENSG00000291135.2 FCGR1BP
3 ENSG00000290735 ENSG00000290735 ENSG00000290735.2 ENSG00000290735
4 LINC02397 LINC02397 ENSG00000205056.9 LINC02397
5 ENSG00000308579 ENSG00000308579 ENSG00000308579.1 ENSG00000308579
6 SNHG15 SNHG15 ENSG00000232956.11 SNHG15
lncRNA_type avg_log2FC_dam_vs_homeostatic p_val_adj_dam_vs_homeostatic
1 LINC 0.3735245 0.0045876930
2 LINC 0.6726387 0.0001307758
3 Sense_overlapping 1.0989958 0.0004037941
4 LINC 1.1166999 0.0022463767
5 LINC 1.1730522 0.0175075936
6 LINC 1.1985005 0.0003286326
pct.1_dam pct.2_homeostatic avg_log2FC_PD_vs_C p_val_adj_PD_vs_C pct.1_PD
1 0.280 0.185 1.096231 1.622925e-11 0.273
2 0.160 0.083 1.125438 3.850750e-07 0.144
3 0.094 0.039 1.489717 4.384791e-06 0.083
4 0.077 0.030 1.496335 3.326432e-05 0.066
5 0.068 0.027 1.317259 7.303820e-04 0.065
6 0.071 0.024 1.128459 2.842268e-02 0.062
pct.2_C same_direction priority_score cluster_support max_cluster_log2FC
1 0.165 TRUE 14.597863 1 0.5672749
2 0.073 TRUE 12.096004 1 0.9150627
3 0.032 TRUE 11.340604 1 1.2543783
4 0.023 TRUE 9.739574 1 1.4345364
5 0.025 TRUE 7.383535 <NA> NA
6 0.027 TRUE 7.356584 1 1.2549421
min_cluster_p_val_adj
1 1.528116e-05
2 8.463843e-08
3 7.507519e-07
4 4.081654e-07
5 NA
6 8.254791e-04
gene gene_symbol_for_plot gencode_gene_id gencode_gene_name
1 DLEU1 DLEU1 ENSG00000176124.16 DLEU1
2 ENSG00000287616 ENSG00000287616 ENSG00000287616.2 ENSG00000287616
3 LINC02712 LINC02712 ENSG00000273409.4 LINC02712
4 XACT XACT ENSG00000241743.5 XACT
lncRNA_type avg_log2FC_dam_vs_homeostatic p_val_adj_dam_vs_homeostatic
1 Antisense -0.6635837 1.847872e-10
2 LINC -0.8497620 2.345796e-03
3 LINC -1.5901997 1.455275e-07
4 LINC -1.6214382 4.110563e-06
pct.1_dam pct.2_homeostatic avg_log2FC_PD_vs_C p_val_adj_PD_vs_C pct.1_PD
1 0.698 0.692 -0.6094611 1.680843e-17 0.640
2 0.303 0.369 -1.1227391 3.771405e-21 0.257
3 0.123 0.214 -1.6596257 1.126833e-14 0.106
4 0.160 0.246 -0.9300041 8.087780e-10 0.151
pct.2_C same_direction priority_score cluster_support max_cluster_log2FC
1 0.732 TRUE 27.78085 0 0.6750710
2 0.406 TRUE 25.02571 0 0.8819311
3 0.212 TRUE 24.03502 0 1.7319764
4 0.252 TRUE 17.02971 0 1.3910430
min_cluster_p_val_adj
1 1.580486e-16
2 1.835708e-05
3 3.226066e-13
4 5.826077e-07
Step 14.9: Visualize Top lncRNA Candidates
We visualize the top shared candidates when they exist.
If no shared candidates are found, we visualize the top candidates from the DAM state comparison, or the PD vs C comparison.
Show code
if (nrow (priority_micro_lncRNAs) > 0 ) {
top_lncRNAs_for_plot <- priority_micro_lncRNAs %>%
slice_head (n = 12 ) %>%
pull (gene)
} else {
top_lncRNAs_for_plot <- c ()
}
if (length (top_lncRNAs_for_plot) == 0 && nrow (dam_lncRNA_candidates) > 0 ) {
top_lncRNAs_for_plot <- dam_lncRNA_candidates %>%
slice_head (n = 12 ) %>%
pull (gene)
}
if (length (top_lncRNAs_for_plot) == 0 && nrow (PD_lncRNA_candidates) > 0 ) {
top_lncRNAs_for_plot <- PD_lncRNA_candidates %>%
slice_head (n = 12 ) %>%
pull (gene)
}
top_lncRNAs_for_plot <- intersect (top_lncRNAs_for_plot, rownames (SO_micro))
top_lncRNAs_for_plot
[1] "DLEU1" "ENSG00000287616" "LINC02712" "XACT"
[5] "MIR223HG" "FCGR1BP" "ENSG00000290735" "LINC02397"
[9] "ENSG00000308579" "SNHG15" "ADAM7-AS1" "NEAT1"
Show code
if (length (top_lncRNAs_for_plot) > 0 ) {
FeaturePlot (
SO_micro,
features = top_lncRNAs_for_plot,
reduction = "umap_micro_harmony" ,
ncol = 4 ,
pt.size = 0.5
)
} else {
message ("No lncRNA candidates available for FeaturePlot." )
}
Show code
if (length (top_lncRNAs_for_plot) > 0 ) {
DotPlot (
SO_micro,
features = top_lncRNAs_for_plot,
group.by = "micro_state_refined"
) +
RotatedAxis () +
labs (title = "Top lncRNA Candidates by Refined Microglia State" )
} else {
message ("No lncRNA candidates available for DotPlot." )
}
Show code
if (length (top_lncRNAs_for_plot) > 0 ) {
DotPlot (
SO_micro,
features = top_lncRNAs_for_plot,
group.by = "condition"
) +
RotatedAxis () +
labs (title = "Top lncRNA Candidates by Condition" )
} else {
message ("No lncRNA candidates available for DotPlot." )
}
Step 14.10: Save Microglia DE and lncRNA Discovery Results
Show code
saveRDS (
SO_micro,
file.path (results_dir, "SO_micro_step14_lncRNA_analysis.rds" )
)
saveRDS (
SO_micro_main_DE,
file.path (results_dir, "SO_micro_step14_main_DE_subset.rds" )
)
write.csv (
micro_state_cluster_summary,
file.path (results_dir, "Step14_micro_state_cluster_summary.csv" ),
row.names = FALSE
)
write.csv (
micro_state_group_summary,
file.path (results_dir, "Step14_micro_state_group_summary.csv" ),
row.names = FALSE
)
if (nrow (micro_dam_vs_homeostatic_annotated) > 0 ) {
write.csv (
micro_dam_vs_homeostatic_annotated,
file.path (results_dir, "Step14_DE_DAM_vs_homeostatic.csv" ),
row.names = FALSE
)
}
write.csv (
micro_PD_vs_C_annotated,
file.path (results_dir, "Step14_DE_PD_vs_C_microglia.csv" ),
row.names = FALSE
)
write.csv (
micro_cluster_markers_annotated,
file.path (results_dir, "Step14_micro_cluster_markers_annotated.csv" ),
row.names = FALSE
)
if (nrow (dam_lncRNA_candidates) > 0 ) {
write.csv (
dam_lncRNA_candidates,
file.path (results_dir, "Step14_DAM_lncRNA_candidates.csv" ),
row.names = FALSE
)
}
write.csv (
PD_lncRNA_candidates,
file.path (results_dir, "Step14_PD_lncRNA_candidates.csv" ),
row.names = FALSE
)
write.csv (
cluster_lncRNA_candidates,
file.path (results_dir, "Step14_cluster_lncRNA_candidates.csv" ),
row.names = FALSE
)
if (nrow (priority_micro_lncRNAs) > 0 ) {
write.csv (
priority_micro_lncRNAs,
file.path (results_dir, "Step14_priority_micro_lncRNA_candidates.csv" ),
row.names = FALSE
)
write.csv (
driver_lncRNAs,
file.path (results_dir, "Step14_driver_like_lncRNA_candidates.csv" ),
row.names = FALSE
)
write.csv (
defender_lncRNAs,
file.path (results_dir, "Step14_defender_like_lncRNA_candidates.csv" ),
row.names = FALSE
)
}
write.csv (
data.frame (
setting = c (
"input_object" ,
"state_definition" ,
"dam_clusters" ,
"homeostatic_clusters" ,
"other_excluded_from_main_DE_clusters" ,
"combined_activation_score_usage" ,
"min_pct_use" ,
"logfc_threshold_use" ,
"p_adj_cutoff" ,
"lncRNA_logfc_cutoff" ,
"n_main_DE_cells" ,
"n_DAM_lncRNA_candidates" ,
"n_PD_lncRNA_candidates" ,
"n_cluster_lncRNA_candidates" ,
"n_priority_lncRNAs" ,
"n_driver_like_lncRNAs" ,
"n_defender_like_lncRNAs"
),
value = c (
input_path,
"cluster-based refined microglia states" ,
paste (dam_clusters, collapse = "," ),
paste (homeostatic_clusters, collapse = "," ),
paste (other_clusters, collapse = "," ),
"z-scored summary/visualization only; not used for main DE grouping" ,
min_pct_use,
logfc_threshold_use,
p_adj_cutoff,
lncRNA_logfc_cutoff,
ncol (SO_micro_main_DE),
if (exists ("dam_lncRNA_candidates" )) nrow (dam_lncRNA_candidates) else 0 ,
nrow (PD_lncRNA_candidates),
nrow (cluster_lncRNA_candidates),
if (exists ("priority_micro_lncRNAs" )) nrow (priority_micro_lncRNAs) else 0 ,
if (exists ("driver_lncRNAs" )) nrow (driver_lncRNAs) else 0 ,
if (exists ("defender_lncRNAs" )) nrow (defender_lncRNAs) else 0
)
),
file.path (results_dir, "Step14_analysis_settings.csv" ),
row.names = FALSE
)
Interpretation Note
This analysis prioritizes lncRNAs that are associated with microglia state and/or PD status.
The strongest candidates are not simply the most significant DE genes. They are candidates that satisfy several criteria:
1. Context-based lncRNA annotation from Step 13 using GENCODE v49
2. Differential expression in cluster-defined DAM microglia
3. Differential expression in PD microglia
4. Optional support from microglia subcluster markers
5. Interpretable direction of change
The final candidate list should be treated as a hypothesis-generating result. Any top lncRNA should be checked manually using genome annotation databases and literature before being described as novel or biologically meaningful.