Step 14: Microglia lncRNA Analysis

Cluster-based differential expression and lncRNA candidate discovery in microglia states

1 Setup: Environment and Data

This file starts from the microglia object generated in Step 13.

The input object should already contain:

microglia cells
microglia subclusters
homeostatic / DAM / inflammatory / phagocytic / interferon response module scores
condition information

The main biological goal is to identify lncRNAs associated with a cluster-defined microglia state transition:

Homeostatic microglia
→ activated / disease-associated microglia (DAM)

This is a microglia state analysis, not a broad cell-type discovery analysis.

Show code

library(Seurat)
library(tidyverse)
library(patchwork)

set.seed(1234)

project_dir <- "/Users/yoshimurasouhei/Downloads/010_school/4年生/bioinfomaticsリサーチクラークシップ/PD_2026"

input_path <- file.path(
  project_dir,
  "results",
  "Step13_microglia_harmony_reclustering",
  "SO_micro_step13_fernando_harmony_reclustered_scored.rds"
)

results_dir <- file.path(
  project_dir,
  "results",
  "Step14_microglia_lncRNA_exploration"
)

dir.create(results_dir, recursive = TRUE, showWarnings = FALSE)

SO_micro <- readRDS(input_path)

DefaultAssay(SO_micro) <- "RNA"

if ("Assay5" %in% class(SO_micro[["RNA"]])) {
  SO_micro <- JoinLayers(SO_micro, assay = "RNA")
}

SO_micro

An object of class Seurat 
87674 features across 3439 samples within 2 assays 
Active assay: RNA (47773 features, 2000 variable features)
 3 layers present: counts, data, scale.data
 1 other assay present: SCT
 7 dimensional reductions calculated: pca, umap, harmony, pca_micro, harmony_micro, umap_micro_harmony, umap_micro_unintegrated

Show code

required_cols <- c(
  "seurat_clusters_harmony",
  "condition",
  "sample",
  "sex",
  "Micro_homeostatic_score1",
  "DAM_score1",
  "Inflammatory_score1",
  "Phagocytic_score1",
  "Interferon_response_score1",
  "umap_micro_harmony"
)

missing_cols <- setdiff(required_cols, c(colnames(SO_micro@meta.data), Reductions(SO_micro)))

if (length(missing_cols) > 0) {
  stop(
    paste(
      "Missing required Step 13 outputs:",
      paste(missing_cols, collapse = ", ")
    )
  )
}

table(SO_micro$seurat_clusters_harmony)


   0    1    2    3    4    5 
1523 1191  406  153  142   24

Show code

table(SO_micro$condition)


   C   PD 
1748 1691

Show code

stopifnot("umap_micro_harmony" %in% Reductions(SO_micro))
stopifnot("seurat_clusters_harmony" %in% colnames(SO_micro@meta.data))
stopifnot(!any(is.na(SO_micro$seurat_clusters_harmony)))

2 Step 14.1: Refine Microglia State Annotation

Here, the main state definition is cluster-based, not quartile-based.

Based on Step 13 cluster summaries, you should define your main groups:

DAM:
  [update clusters]

Homeostatic:
  [update clusters]

Other / excluded from main binary DE:
  [update clusters]

Clusters excluded from the main binary DE comparison might represent background-stress-like or minor/intermediate states rather than the two extreme states used for candidate prioritization.

The combined activation score is calculated only for visualization and summary. It is not used to define the main DE groups.

Show code

SO_micro$micro_cluster <- as.character(SO_micro$seurat_clusters_harmony)

dam_clusters <- c("1")
homeostatic_clusters <- c("0")
other_clusters <- c("2", "3", "4", "5")

if (length(c(dam_clusters, homeostatic_clusters, other_clusters)) == 0) {
  message("Please define DAM, Homeostatic, and Other clusters based on Step 13.")
  SO_micro$micro_state_refined <- "Unassigned"
} else {
  SO_micro$micro_state_refined <- case_when(
    SO_micro$micro_cluster %in% dam_clusters ~ "DAM",
    SO_micro$micro_cluster %in% homeostatic_clusters ~ "Homeostatic",
    SO_micro$micro_cluster %in% other_clusters ~ "Other",
    TRUE ~ "Unassigned"
  )
}

SO_micro$micro_state_refined <- factor(
  SO_micro$micro_state_refined,
  levels = c("DAM", "Homeostatic", "Other", "Unassigned")
)

table(SO_micro$micro_cluster, SO_micro$micro_state_refined)

   
     DAM Homeostatic Other Unassigned
  0    0        1523     0          0
  1 1191           0     0          0
  2    0           0   406          0
  3    0           0   153          0
  4    0           0   142          0
  5    0           0    24          0

Show code

table(SO_micro$micro_state_refined, SO_micro$condition)

             
                C  PD
  DAM         565 626
  Homeostatic 745 778
  Other       438 287
  Unassigned    0   0

Show code

activation_score_matrix <- SO_micro@meta.data[, c(
  "DAM_score1",
  "Inflammatory_score1",
  "Phagocytic_score1",
  "Interferon_response_score1"
)]

SO_micro$combined_activation_score_z <- rowMeans(
  scale(activation_score_matrix),
  na.rm = TRUE
)

summary(SO_micro$combined_activation_score_z)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-1.4047 -0.5178 -0.1057  0.0000  0.4396  3.2179

Show code

micro_state_cluster_summary <- SO_micro@meta.data %>%
  mutate(
    micro_cluster = as.character(seurat_clusters_harmony),
    micro_state_refined = as.character(micro_state_refined)
  ) %>%
  group_by(micro_cluster, micro_state_refined) %>%
  summarise(
    n_cells = n(),
    mean_homeostatic = mean(Micro_homeostatic_score1, na.rm = TRUE),
    mean_combined_activation_z = mean(combined_activation_score_z, na.rm = TRUE),
    mean_dam = mean(DAM_score1, na.rm = TRUE),
    mean_inflammatory = mean(Inflammatory_score1, na.rm = TRUE),
    mean_phagocytic = mean(Phagocytic_score1, na.rm = TRUE),
    mean_interferon = mean(Interferon_response_score1, na.rm = TRUE),
    percent_PD = mean(condition == "PD", na.rm = TRUE),
    percent_C = mean(condition == "C", na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(mean_combined_activation_z))

DT::datatable(
  micro_state_cluster_summary,
  rownames = FALSE,
  filter = "top",
  options = list(
    pageLength = 10,
    autoWidth = TRUE,
    scrollX = TRUE
  )
)

Show code

micro_state_group_summary <- SO_micro@meta.data %>%
  mutate(
    micro_cluster = as.character(seurat_clusters_harmony),
    micro_state_refined = as.character(micro_state_refined)
  ) %>%
  group_by(micro_state_refined) %>%
  summarise(
    n_cells = n(),
    mean_homeostatic = mean(Micro_homeostatic_score1, na.rm = TRUE),
    mean_combined_activation_z = mean(combined_activation_score_z, na.rm = TRUE),
    mean_dam = mean(DAM_score1, na.rm = TRUE),
    mean_inflammatory = mean(Inflammatory_score1, na.rm = TRUE),
    mean_phagocytic = mean(Phagocytic_score1, na.rm = TRUE),
    mean_interferon = mean(Interferon_response_score1, na.rm = TRUE),
    percent_PD = mean(condition == "PD", na.rm = TRUE),
    percent_C = mean(condition == "C", na.rm = TRUE),
    clusters_included = paste(sort(unique(micro_cluster)), collapse = ", "),
    .groups = "drop"
  ) %>%
  arrange(desc(mean_combined_activation_z))

DT::datatable(
  micro_state_group_summary,
  rownames = FALSE,
  filter = "top",
  options = list(
    pageLength = 10,
    autoWidth = TRUE,
    scrollX = TRUE
  )
)

Show code

p_state <- DimPlot(
  SO_micro,
  reduction = "umap_micro_harmony",
  group.by = "micro_state_refined",
  label = TRUE,
  repel = TRUE,
  pt.size = 0.5
) +
  labs(title = "Cluster-Based Refined Microglia State Annotation")

p_score <- FeaturePlot(
  SO_micro,
  features = "combined_activation_score_z",
  reduction = "umap_micro_harmony",
  pt.size = 0.5
) +
  labs(title = "Z-scored Combined Microglia Activation Score")

p_state | p_score

Show code

SO_micro$micro_DE_group <- case_when(
  SO_micro$micro_state_refined == "DAM" ~ "DAM",
  SO_micro$micro_state_refined == "Homeostatic" ~ "Homeostatic",
  TRUE ~ NA_character_
)

SO_micro$micro_DE_group <- factor(
  SO_micro$micro_DE_group,
  levels = c("Homeostatic", "DAM")
)

table(SO_micro$micro_DE_group, useNA = "ifany")


Homeostatic         DAM        <NA> 
       1523        1191         725

Show code

table(SO_micro$micro_DE_group, SO_micro$condition, useNA = "ifany")

             
                C  PD
  Homeostatic 745 778
  DAM         565 626
  <NA>        438 287

Show code

write.csv(
  micro_state_cluster_summary,
  file.path(results_dir, "Step14_1_refined_microglia_state_cluster_summary.csv"),
  row.names = FALSE
)

write.csv(
  micro_state_group_summary,
  file.path(results_dir, "Step14_1_refined_microglia_state_group_summary.csv"),
  row.names = FALSE
)

write.csv(
  as.data.frame.matrix(table(SO_micro$micro_state_refined, SO_micro$condition)),
  file.path(results_dir, "Step14_1_refined_microglia_state_condition_table.csv")
)

write.csv(
  as.data.frame.matrix(table(SO_micro$micro_cluster, SO_micro$micro_state_refined)),
  file.path(results_dir, "Step14_1_cluster_to_refined_state_table.csv")
)

3 Step 14.2: Define Comparison Groups for Differential Expression

We define two main comparisons:

1. DAM vs Homeostatic
2. PD vs C within microglia

The first comparison prioritizes cluster-defined microglia state-associated genes.
The second comparison prioritizes disease-associated genes across all microglia.

Show code

SO_micro_main_DE <- subset(
  SO_micro,
  subset = !is.na(micro_DE_group)
)

SO_micro_main_DE$micro_DE_group <- droplevels(SO_micro_main_DE$micro_DE_group)

table(SO_micro_main_DE$micro_DE_group)


Homeostatic         DAM 
       1523        1191

Show code

table(SO_micro_main_DE$micro_DE_group, SO_micro_main_DE$condition)

             
                C  PD
  Homeostatic 745 778
  DAM         565 626

Show code

min_pct_use <- 0.05
logfc_threshold_use <- 0.10
p_adj_cutoff <- 0.05
lncRNA_logfc_cutoff <- 0.25

min_pct_use

[1] 0.05

Show code

logfc_threshold_use

[1] 0.1

Show code

p_adj_cutoff

[1] 0.05

Show code

lncRNA_logfc_cutoff

[1] 0.25

4 Step 14.3: Differential Expression Analysis

We run DE for:

DAM vs Homeostatic
PD vs C

Show code

if (length(unique(SO_micro_main_DE$micro_DE_group)) == 2) {
  Idents(SO_micro_main_DE) <- SO_micro_main_DE$micro_DE_group

  micro_dam_vs_homeostatic_DE <- FindMarkers(
    SO_micro_main_DE,
    ident.1 = "DAM",
    ident.2 = "Homeostatic",
    min.pct = min_pct_use,
    logfc.threshold = logfc_threshold_use,
    test.use = "wilcox"
  ) %>%
    rownames_to_column("gene") %>%
    arrange(p_val_adj, desc(avg_log2FC))

  print(micro_dam_vs_homeostatic_DE %>% slice_head(n = 20))
} else {
  message("Not enough groups for DAM vs Homeostatic DE. Make sure to define clusters in Step 14.1.")
  micro_dam_vs_homeostatic_DE <- data.frame(gene = character())
}

      gene         p_val avg_log2FC pct.1 pct.2     p_val_adj
1      FTL 1.095949e-228   2.454705 0.942 0.512 5.235677e-224
2    RPS19 4.141249e-212   2.193070 0.898 0.387 1.978399e-207
3     APOE 3.479507e-195   2.364915 0.864 0.389 1.662265e-190
4    RPL11 8.837063e-193   2.673218 0.771 0.225 4.221730e-188
5    RPL13 8.781991e-187   2.205156 0.872 0.406 4.195421e-182
6     TPT1 3.702848e-185   2.132740 0.879 0.422 1.768962e-180
7    RPL19 1.034972e-182   2.498880 0.767 0.222 4.944371e-178
8    RPS23 1.465859e-179   2.810964 0.691 0.156 7.002849e-175
9   EEF1A1 2.251186e-179   1.896427 0.890 0.446 1.075459e-174
10   RPL32 4.398240e-179   2.758371 0.709 0.177 2.101171e-174
11   RPLP1 9.508172e-177   2.145385 0.853 0.362 4.542339e-172
12    CD74 8.270541e-175   1.716980 0.930 0.531 3.951086e-170
13   RPS27 2.493567e-172   2.938598 0.679 0.160 1.191252e-167
14    C1QC 4.047387e-172   2.431164 0.748 0.242 1.933558e-167
15 HLA-DRA 1.246920e-171   2.479459 0.743 0.233 5.956910e-167
16  TMSB4X 8.999113e-171   1.903514 0.888 0.445 4.299146e-166
17    RPS2 2.917229e-170   2.512467 0.714 0.191 1.393648e-165
18     B2M 1.305739e-168   2.181601 0.793 0.303 6.237909e-164
19   RPL7A 1.806646e-164   2.573908 0.672 0.160 8.630890e-160
20   RPS28 1.256351e-163   2.047980 0.816 0.323 6.001966e-159

Show code

Idents(SO_micro) <- SO_micro$condition

micro_PD_vs_C_DE <- FindMarkers(
  SO_micro,
  ident.1 = "PD",
  ident.2 = "C",
  min.pct = min_pct_use,
  logfc.threshold = logfc_threshold_use,
  test.use = "wilcox"
) %>%
  rownames_to_column("gene") %>%
  arrange(p_val_adj, desc(avg_log2FC))

DT::datatable(
  micro_PD_vs_C_DE,
  rownames = FALSE,
  filter = "top",
  options = list(
    pageLength = 10,
    autoWidth = TRUE,
    scrollX = TRUE
  )
)

5 Step 14.4: Cluster-Specific DE Analysis

Here we extract markers for each microglia subcluster.

These are subcluster/state markers, not broad microglia identity markers.

Show code

Idents(SO_micro) <- SO_micro$seurat_clusters_harmony

micro_cluster_markers <- FindAllMarkers(
  SO_micro,
  only.pos = TRUE,
  min.pct = min_pct_use,
  logfc.threshold = logfc_threshold_use,
  test.use = "wilcox"
) %>%
  arrange(cluster, p_val_adj, desc(avg_log2FC))

micro_cluster_markers %>% head(30)

Show code

top_micro_cluster_markers <- micro_cluster_markers %>%
  group_by(cluster) %>%
  slice_min(order_by = p_val_adj, n = 20, with_ties = FALSE) %>%
  ungroup()

top_micro_cluster_markers

6 Step 14.5 / 14.6: Annotate DE Results with lncRNA Context

Show code

lncRNA_annotation_path <- file.path(
  project_dir,
  "results",
  "Step13_microglia_harmony_reclustering",
  "Step13_microglia_lncRNA_gene_annotation_context_based.csv"
)

if (!file.exists(lncRNA_annotation_path)) {
  stop(
    paste(
      "Step 13 context-based lncRNA annotation file not found:",
      lncRNA_annotation_path
    )
  )
}

lncRNA_gene_annotation <- read.csv(
  lncRNA_annotation_path,
  stringsAsFactors = FALSE
) %>%
  as_tibble()

required_lncRNA_anno_cols <- c(
  "gene",
  "gencode_gene_id",
  "gencode_gene_name",
  "lncRNA_type",
  "is_lncRNA"
)

missing_lncRNA_anno_cols <- setdiff(
  required_lncRNA_anno_cols,
  colnames(lncRNA_gene_annotation)
)

if (length(missing_lncRNA_anno_cols) > 0) {
  stop(
    paste(
      "Missing columns in Step 13 lncRNA annotation:",
      paste(missing_lncRNA_anno_cols, collapse = ", ")
    )
  )
}

lncRNA_gene_annotation <- lncRNA_gene_annotation %>%
  dplyr::select(
    gene,
    gencode_gene_id,
    gencode_gene_name,
    lncRNA_type,
    is_lncRNA
  ) %>%
  distinct(gene, .keep_all = TRUE)

lncRNA_gene_annotation %>%
  count(lncRNA_type, name = "n_genes") %>%
  arrange(desc(n_genes))

Show code

annotate_de_table_with_lncRNA_context <- function(de_table) {
  if (nrow(de_table) == 0) return(de_table)
  
  de_table %>%
    left_join(
      lncRNA_gene_annotation,
      by = "gene"
    ) %>%
    mutate(
      is_lncRNA = if_else(is.na(is_lncRNA), FALSE, is_lncRNA),
      lncRNA_type = if_else(
        is.na(lncRNA_type),
        "Not_lncRNA_or_unmatched",
        lncRNA_type
      ),
      gene_symbol_for_plot = case_when(
        !is.na(gencode_gene_name) ~ gencode_gene_name,
        TRUE ~ gene
      )
    )
}

micro_dam_vs_homeostatic_annotated <- annotate_de_table_with_lncRNA_context(
  micro_dam_vs_homeostatic_DE
)

micro_PD_vs_C_annotated <- annotate_de_table_with_lncRNA_context(
  micro_PD_vs_C_DE
)

micro_cluster_markers_annotated <- annotate_de_table_with_lncRNA_context(
  micro_cluster_markers
)

7 Step 14.7: Extract Annotated lncRNA Candidates

We extract lncRNA-like candidates from each comparison.

Show code

if (nrow(micro_dam_vs_homeostatic_annotated) > 0) {
  dam_lncRNA_candidates <- micro_dam_vs_homeostatic_annotated %>%
    filter(
      is_lncRNA,
      p_val_adj < p_adj_cutoff,
      abs(avg_log2FC) >= lncRNA_logfc_cutoff
    ) %>%
    arrange(p_val_adj, desc(abs(avg_log2FC)))
} else {
  dam_lncRNA_candidates <- data.frame(gene = character())
}

PD_lncRNA_candidates <- micro_PD_vs_C_annotated %>%
  filter(
    is_lncRNA,
    p_val_adj < p_adj_cutoff,
    abs(avg_log2FC) >= lncRNA_logfc_cutoff
  ) %>%
  arrange(p_val_adj, desc(abs(avg_log2FC)))

cluster_lncRNA_candidates <- micro_cluster_markers_annotated %>%
  filter(
    is_lncRNA,
    p_val_adj < p_adj_cutoff,
    abs(avg_log2FC) >= lncRNA_logfc_cutoff
  ) %>%
  arrange(cluster, p_val_adj, desc(avg_log2FC))

if (nrow(dam_lncRNA_candidates) > 0) {
  print(dam_lncRNA_candidates %>%
    dplyr::select(gene, gene_symbol_for_plot, lncRNA_type, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
    slice_head(n = 30))
}

              gene gene_symbol_for_plot lncRNA_type avg_log2FC pct.1 pct.2
1           MALAT1               MALAT1        LINC  0.6714575 1.000 1.000
2  ENSG00000296425      ENSG00000296425   Antisense  1.8225975 0.630 0.225
3        ADAM7-AS1            ADAM7-AS1   Antisense  1.8091683 0.439 0.128
4           TALAM1               TALAM1        LINC  1.5954060 0.351 0.112
5         HLA-DRB6             HLA-DRB6        LINC  2.2677500 0.189 0.032
6            NEAT1                NEAT1        LINC -0.6334787 0.898 0.915
7           SNHG29               SNHG29   Antisense  1.1624198 0.296 0.104
8  ENSG00000309976      ENSG00000309976   Antisense  1.6934093 0.242 0.076
9            SNHG6                SNHG6   Antisense  1.2683480 0.262 0.092
10           SNHG8                SNHG8   Antisense  1.9131695 0.167 0.035
11 ENSG00000286618      ENSG00000286618   Antisense  1.8047126 0.231 0.079
12            XIST                 XIST        LINC -1.2976366 0.306 0.457
13 ENSG00000265987      ENSG00000265987   Antisense  1.9337850 0.160 0.039
14 ENSG00000228655      ENSG00000228655   Antisense  1.6087027 0.187 0.056
15           ZFAS1                ZFAS1   Antisense  1.1553797 0.223 0.083
16     TBC1D22A-DT          TBC1D22A-DT        LINC  3.0025319 0.100 0.015
17 ENSG00000297005      ENSG00000297005   Antisense  1.2346967 0.202 0.074
18 ENSG00000288853      ENSG00000288853   Antisense  1.4891391 0.154 0.046
19      ABHD15-AS1           ABHD15-AS1   Antisense  1.0700029 0.235 0.098
20 ENSG00000307379      ENSG00000307379   Antisense  1.4734386 0.175 0.060
21 ENSG00000302619      ENSG00000302619        LINC -1.2335708 0.221 0.356
22 ENSG00000307107      ENSG00000307107   Antisense  1.5725877 0.134 0.040
23 ENSG00000286145      ENSG00000286145   Antisense  1.6920963 0.106 0.024
24 ENSG00000300996      ENSG00000300996   Antisense  1.9160502 0.105 0.024
25 ENSG00000296449      ENSG00000296449   Antisense  2.5243396 0.084 0.014
26 ENSG00000274441      ENSG00000274441   Antisense  1.2741147 0.132 0.041
27            GAS5                 GAS5   Antisense  1.0910310 0.183 0.075
28       ZFHX3-AS1            ZFHX3-AS1   Antisense  1.5964842 0.107 0.028
29 ENSG00000250362      ENSG00000250362   Antisense  1.5848590 0.118 0.035
30      AKAP13-AS1           AKAP13-AS1   Antisense  1.7004182 0.089 0.019
      p_val_adj
1  4.209140e-98
2  4.258754e-98
3  2.558766e-67
4  4.394103e-45
5  7.580729e-36
6  2.559256e-31
7  1.547894e-29
8  2.700286e-28
9  2.406287e-26
10 2.563382e-26
11 8.059606e-24
12 1.692881e-23
13 3.104934e-22
14 2.473728e-21
15 1.028157e-18
16 4.071630e-18
17 2.068147e-17
18 9.380344e-17
19 9.421254e-17
20 1.746406e-16
21 5.141314e-14
22 5.506287e-14
23 5.739029e-14
24 1.036397e-13
25 2.536082e-13
26 1.325480e-12
27 2.289196e-12
28 3.267779e-12
29 3.748365e-12
30 5.865161e-12

Show code

PD_lncRNA_candidates %>%
  dplyr::select(gene, gene_symbol_for_plot, lncRNA_type, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  slice_head(n = 30)

Show code

cluster_lncRNA_candidates %>%
  dplyr::select(cluster, gene, gene_symbol_for_plot, lncRNA_type, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  slice_head(n = 30)

8 Step 14.8: Prioritize lncRNAs Shared Across Comparisons

The strongest candidates are lncRNAs associated with both the activated/DAM state and PD status.

Show code

if (nrow(dam_lncRNA_candidates) > 0) {
  priority_micro_lncRNAs <- dam_lncRNA_candidates %>%
    dplyr::select(
      gene,
      gene_symbol_for_plot,
      gencode_gene_id,
      gencode_gene_name,
      lncRNA_type,
      avg_log2FC_dam_vs_homeostatic = avg_log2FC,
      p_val_adj_dam_vs_homeostatic = p_val_adj,
      pct.1_dam = pct.1,
      pct.2_homeostatic = pct.2
    ) %>%
    inner_join(
      PD_lncRNA_candidates %>%
        dplyr::select(
          gene,
          avg_log2FC_PD_vs_C = avg_log2FC,
          p_val_adj_PD_vs_C = p_val_adj,
          pct.1_PD = pct.1,
          pct.2_C = pct.2
        ),
      by = "gene"
    ) %>%
    mutate(
      same_direction = sign(avg_log2FC_dam_vs_homeostatic) == sign(avg_log2FC_PD_vs_C),
      priority_score =
        -log10(p_val_adj_dam_vs_homeostatic + 1e-300) +
        -log10(p_val_adj_PD_vs_C + 1e-300) +
        abs(avg_log2FC_dam_vs_homeostatic) +
        abs(avg_log2FC_PD_vs_C)
    ) %>%
    arrange(desc(same_direction), desc(priority_score))

  print(priority_micro_lncRNAs)
} else {
  priority_micro_lncRNAs <- data.frame(gene = character())
  message("DAM clusters not defined, so priority shared lncRNAs cannot be calculated.")
}

              gene gene_symbol_for_plot    gencode_gene_id gencode_gene_name
1            DLEU1                DLEU1 ENSG00000176124.16             DLEU1
2  ENSG00000287616      ENSG00000287616  ENSG00000287616.2   ENSG00000287616
3        LINC02712            LINC02712  ENSG00000273409.4         LINC02712
4             XACT                 XACT  ENSG00000241743.5              XACT
5         MIR223HG             MIR223HG ENSG00000274536.10          MIR223HG
6          FCGR1BP              FCGR1BP  ENSG00000291135.2           FCGR1BP
7  ENSG00000290735      ENSG00000290735  ENSG00000290735.2   ENSG00000290735
8        LINC02397            LINC02397  ENSG00000205056.9         LINC02397
9  ENSG00000308579      ENSG00000308579  ENSG00000308579.1   ENSG00000308579
10          SNHG15               SNHG15 ENSG00000232956.11            SNHG15
11       ADAM7-AS1            ADAM7-AS1  ENSG00000253535.6         ADAM7-AS1
12           NEAT1                NEAT1 ENSG00000245532.13             NEAT1
13 ENSG00000302619      ENSG00000302619  ENSG00000302619.1   ENSG00000302619
14         LNCAROD              LNCAROD ENSG00000231131.11           LNCAROD
15       LINC02798            LINC02798  ENSG00000227082.4         LINC02798
16    RNASEH2B-AS1         RNASEH2B-AS1 ENSG00000233672.10      RNASEH2B-AS1
17       LINC01678            LINC01678  ENSG00000225331.3         LINC01678
18 ENSG00000289899      ENSG00000289899  ENSG00000289899.2   ENSG00000289899
         lncRNA_type avg_log2FC_dam_vs_homeostatic p_val_adj_dam_vs_homeostatic
1          Antisense                    -0.6635837                 1.847872e-10
2               LINC                    -0.8497620                 2.345796e-03
3               LINC                    -1.5901997                 1.455275e-07
4               LINC                    -1.6214382                 4.110563e-06
5               LINC                     0.3735245                 4.587693e-03
6               LINC                     0.6726387                 1.307758e-04
7  Sense_overlapping                     1.0989958                 4.037941e-04
8               LINC                     1.1166999                 2.246377e-03
9               LINC                     1.1730522                 1.750759e-02
10              LINC                     1.1985005                 3.286326e-04
11         Antisense                     1.8091683                 2.558766e-67
12              LINC                    -0.6334787                 2.559256e-31
13              LINC                    -1.2335708                 5.141314e-14
14              LINC                    -0.6106552                 3.566862e-06
15              LINC                    -1.0441560                 9.584445e-10
16         Antisense                     0.8093342                 2.578993e-02
17         Antisense                     0.5889772                 5.210789e-05
18              LINC                     1.2162408                 3.067046e-02
   pct.1_dam pct.2_homeostatic avg_log2FC_PD_vs_C p_val_adj_PD_vs_C pct.1_PD
1      0.698             0.692         -0.6094611      1.680843e-17    0.640
2      0.303             0.369         -1.1227391      3.771405e-21    0.257
3      0.123             0.214         -1.6596257      1.126833e-14    0.106
4      0.160             0.246         -0.9300041      8.087780e-10    0.151
5      0.280             0.185          1.0962312      1.622925e-11    0.273
6      0.160             0.083          1.1254378      3.850750e-07    0.144
7      0.094             0.039          1.4897168      4.384791e-06    0.083
8      0.077             0.030          1.4963353      3.326432e-05    0.066
9      0.068             0.027          1.3172593      7.303820e-04    0.065
10     0.071             0.024          1.1284589      2.842268e-02    0.062
11     0.439             0.128         -0.9963488      4.951895e-06    0.213
12     0.898             0.915          0.5400884      1.854489e-23    0.936
13     0.221             0.356          0.3912819      3.443363e-13    0.348
14     0.636             0.659          0.6367654      2.886327e-20    0.704
15     0.411             0.483          0.2786523      1.818758e-02    0.484
16     0.071             0.030         -2.0492466      9.704099e-06    0.024
17     0.112             0.047         -1.4055090      2.841494e-03    0.050
18     0.051             0.017         -2.4140107      5.314744e-04    0.017
   pct.2_C same_direction priority_score
1    0.732           TRUE      27.780846
2    0.406           TRUE      25.025708
3    0.212           TRUE      24.035021
4    0.252           TRUE      17.029712
5    0.165           TRUE      14.597863
6    0.073           TRUE      12.096004
7    0.032           TRUE      11.340604
8    0.023           TRUE       9.739574
9    0.025           TRUE       7.383535
10   0.027           TRUE       7.356584
11   0.292          FALSE      74.702715
12   0.894          FALSE      54.497229
13   0.203          FALSE      27.376796
14   0.558          FALSE      26.234789
15   0.384          FALSE      12.081466
16   0.070          FALSE       9.460175
17   0.097          FALSE       8.824036
18   0.053          FALSE       8.418049

Show code

if (nrow(priority_micro_lncRNAs) > 0) {
  cluster_lncRNA_support <- cluster_lncRNA_candidates %>%
    group_by(gene) %>%
    summarise(
      cluster_support = paste(sort(unique(cluster)), collapse = ";"),
      max_cluster_log2FC = max(avg_log2FC, na.rm = TRUE),
      min_cluster_p_val_adj = min(p_val_adj, na.rm = TRUE),
      .groups = "drop"
    )

  priority_micro_lncRNAs <- priority_micro_lncRNAs %>%
    left_join(cluster_lncRNA_support, by = "gene") %>%
    arrange(desc(same_direction), desc(priority_score))

  print(
    priority_micro_lncRNAs %>%
      dplyr::select(
        gene,
        gene_symbol_for_plot,
        avg_log2FC_dam_vs_homeostatic,
        avg_log2FC_PD_vs_C,
        p_val_adj_dam_vs_homeostatic,
        p_val_adj_PD_vs_C,
        same_direction,
        priority_score,
        cluster_support
      ) %>%
      slice_head(n = 30)
  )
}

              gene gene_symbol_for_plot avg_log2FC_dam_vs_homeostatic
1            DLEU1                DLEU1                    -0.6635837
2  ENSG00000287616      ENSG00000287616                    -0.8497620
3        LINC02712            LINC02712                    -1.5901997
4             XACT                 XACT                    -1.6214382
5         MIR223HG             MIR223HG                     0.3735245
6          FCGR1BP              FCGR1BP                     0.6726387
7  ENSG00000290735      ENSG00000290735                     1.0989958
8        LINC02397            LINC02397                     1.1166999
9  ENSG00000308579      ENSG00000308579                     1.1730522
10          SNHG15               SNHG15                     1.1985005
11       ADAM7-AS1            ADAM7-AS1                     1.8091683
12           NEAT1                NEAT1                    -0.6334787
13 ENSG00000302619      ENSG00000302619                    -1.2335708
14         LNCAROD              LNCAROD                    -0.6106552
15       LINC02798            LINC02798                    -1.0441560
16    RNASEH2B-AS1         RNASEH2B-AS1                     0.8093342
17       LINC01678            LINC01678                     0.5889772
18 ENSG00000289899      ENSG00000289899                     1.2162408
   avg_log2FC_PD_vs_C p_val_adj_dam_vs_homeostatic p_val_adj_PD_vs_C
1          -0.6094611                 1.847872e-10      1.680843e-17
2          -1.1227391                 2.345796e-03      3.771405e-21
3          -1.6596257                 1.455275e-07      1.126833e-14
4          -0.9300041                 4.110563e-06      8.087780e-10
5           1.0962312                 4.587693e-03      1.622925e-11
6           1.1254378                 1.307758e-04      3.850750e-07
7           1.4897168                 4.037941e-04      4.384791e-06
8           1.4963353                 2.246377e-03      3.326432e-05
9           1.3172593                 1.750759e-02      7.303820e-04
10          1.1284589                 3.286326e-04      2.842268e-02
11         -0.9963488                 2.558766e-67      4.951895e-06
12          0.5400884                 2.559256e-31      1.854489e-23
13          0.3912819                 5.141314e-14      3.443363e-13
14          0.6367654                 3.566862e-06      2.886327e-20
15          0.2786523                 9.584445e-10      1.818758e-02
16         -2.0492466                 2.578993e-02      9.704099e-06
17         -1.4055090                 5.210789e-05      2.841494e-03
18         -2.4140107                 3.067046e-02      5.314744e-04
   same_direction priority_score cluster_support
1            TRUE      27.780846               0
2            TRUE      25.025708               0
3            TRUE      24.035021               0
4            TRUE      17.029712               0
5            TRUE      14.597863               1
6            TRUE      12.096004               1
7            TRUE      11.340604               1
8            TRUE       9.739574               1
9            TRUE       7.383535            <NA>
10           TRUE       7.356584               1
11          FALSE      74.702715               1
12          FALSE      54.497229             0;4
13          FALSE      27.376796               0
14          FALSE      26.234789               0
15          FALSE      12.081466               0
16          FALSE       9.460175            <NA>
17          FALSE       8.824036               1
18          FALSE       8.418049            <NA>

Show code

if (nrow(priority_micro_lncRNAs) > 0) {
  driver_lncRNAs <- priority_micro_lncRNAs %>%
    filter(
      same_direction,
      avg_log2FC_dam_vs_homeostatic > 0,
      avg_log2FC_PD_vs_C > 0
    )

  defender_lncRNAs <- priority_micro_lncRNAs %>%
    filter(
      same_direction,
      avg_log2FC_dam_vs_homeostatic < 0,
      avg_log2FC_PD_vs_C < 0
    )

  print(driver_lncRNAs)
  print(defender_lncRNAs)
} else {
  driver_lncRNAs <- data.frame()
  defender_lncRNAs <- data.frame()
}

             gene gene_symbol_for_plot    gencode_gene_id gencode_gene_name
1        MIR223HG             MIR223HG ENSG00000274536.10          MIR223HG
2         FCGR1BP              FCGR1BP  ENSG00000291135.2           FCGR1BP
3 ENSG00000290735      ENSG00000290735  ENSG00000290735.2   ENSG00000290735
4       LINC02397            LINC02397  ENSG00000205056.9         LINC02397
5 ENSG00000308579      ENSG00000308579  ENSG00000308579.1   ENSG00000308579
6          SNHG15               SNHG15 ENSG00000232956.11            SNHG15
        lncRNA_type avg_log2FC_dam_vs_homeostatic p_val_adj_dam_vs_homeostatic
1              LINC                     0.3735245                 0.0045876930
2              LINC                     0.6726387                 0.0001307758
3 Sense_overlapping                     1.0989958                 0.0004037941
4              LINC                     1.1166999                 0.0022463767
5              LINC                     1.1730522                 0.0175075936
6              LINC                     1.1985005                 0.0003286326
  pct.1_dam pct.2_homeostatic avg_log2FC_PD_vs_C p_val_adj_PD_vs_C pct.1_PD
1     0.280             0.185           1.096231      1.622925e-11    0.273
2     0.160             0.083           1.125438      3.850750e-07    0.144
3     0.094             0.039           1.489717      4.384791e-06    0.083
4     0.077             0.030           1.496335      3.326432e-05    0.066
5     0.068             0.027           1.317259      7.303820e-04    0.065
6     0.071             0.024           1.128459      2.842268e-02    0.062
  pct.2_C same_direction priority_score cluster_support max_cluster_log2FC
1   0.165           TRUE      14.597863               1          0.5672749
2   0.073           TRUE      12.096004               1          0.9150627
3   0.032           TRUE      11.340604               1          1.2543783
4   0.023           TRUE       9.739574               1          1.4345364
5   0.025           TRUE       7.383535            <NA>                 NA
6   0.027           TRUE       7.356584               1          1.2549421
  min_cluster_p_val_adj
1          1.528116e-05
2          8.463843e-08
3          7.507519e-07
4          4.081654e-07
5                    NA
6          8.254791e-04
             gene gene_symbol_for_plot    gencode_gene_id gencode_gene_name
1           DLEU1                DLEU1 ENSG00000176124.16             DLEU1
2 ENSG00000287616      ENSG00000287616  ENSG00000287616.2   ENSG00000287616
3       LINC02712            LINC02712  ENSG00000273409.4         LINC02712
4            XACT                 XACT  ENSG00000241743.5              XACT
  lncRNA_type avg_log2FC_dam_vs_homeostatic p_val_adj_dam_vs_homeostatic
1   Antisense                    -0.6635837                 1.847872e-10
2        LINC                    -0.8497620                 2.345796e-03
3        LINC                    -1.5901997                 1.455275e-07
4        LINC                    -1.6214382                 4.110563e-06
  pct.1_dam pct.2_homeostatic avg_log2FC_PD_vs_C p_val_adj_PD_vs_C pct.1_PD
1     0.698             0.692         -0.6094611      1.680843e-17    0.640
2     0.303             0.369         -1.1227391      3.771405e-21    0.257
3     0.123             0.214         -1.6596257      1.126833e-14    0.106
4     0.160             0.246         -0.9300041      8.087780e-10    0.151
  pct.2_C same_direction priority_score cluster_support max_cluster_log2FC
1   0.732           TRUE       27.78085               0          0.6750710
2   0.406           TRUE       25.02571               0          0.8819311
3   0.212           TRUE       24.03502               0          1.7319764
4   0.252           TRUE       17.02971               0          1.3910430
  min_cluster_p_val_adj
1          1.580486e-16
2          1.835708e-05
3          3.226066e-13
4          5.826077e-07

9 Step 14.9: Visualize Top lncRNA Candidates

We visualize the top shared candidates when they exist.

If no shared candidates are found, we visualize the top candidates from the DAM state comparison, or the PD vs C comparison.

Show code

if (nrow(priority_micro_lncRNAs) > 0) {
  top_lncRNAs_for_plot <- priority_micro_lncRNAs %>%
    slice_head(n = 12) %>%
    pull(gene)
} else {
  top_lncRNAs_for_plot <- c()
}

if (length(top_lncRNAs_for_plot) == 0 && nrow(dam_lncRNA_candidates) > 0) {
  top_lncRNAs_for_plot <- dam_lncRNA_candidates %>%
    slice_head(n = 12) %>%
    pull(gene)
}

if (length(top_lncRNAs_for_plot) == 0 && nrow(PD_lncRNA_candidates) > 0) {
  top_lncRNAs_for_plot <- PD_lncRNA_candidates %>%
    slice_head(n = 12) %>%
    pull(gene)
}

top_lncRNAs_for_plot <- intersect(top_lncRNAs_for_plot, rownames(SO_micro))

top_lncRNAs_for_plot

 [1] "DLEU1"           "ENSG00000287616" "LINC02712"       "XACT"           
 [5] "MIR223HG"        "FCGR1BP"         "ENSG00000290735" "LINC02397"      
 [9] "ENSG00000308579" "SNHG15"          "ADAM7-AS1"       "NEAT1"

Show code

if (length(top_lncRNAs_for_plot) > 0) {
  FeaturePlot(
    SO_micro,
    features = top_lncRNAs_for_plot,
    reduction = "umap_micro_harmony",
    ncol = 4,
    pt.size = 0.5
  )
} else {
  message("No lncRNA candidates available for FeaturePlot.")
}

Show code

if (length(top_lncRNAs_for_plot) > 0) {
  DotPlot(
    SO_micro,
    features = top_lncRNAs_for_plot,
    group.by = "micro_state_refined"
  ) +
    RotatedAxis() +
    labs(title = "Top lncRNA Candidates by Refined Microglia State")
} else {
  message("No lncRNA candidates available for DotPlot.")
}

Show code

if (length(top_lncRNAs_for_plot) > 0) {
  DotPlot(
    SO_micro,
    features = top_lncRNAs_for_plot,
    group.by = "condition"
  ) +
    RotatedAxis() +
    labs(title = "Top lncRNA Candidates by Condition")
} else {
  message("No lncRNA candidates available for DotPlot.")
}

10 Step 14.10: Save Microglia DE and lncRNA Discovery Results

Show code

saveRDS(
  SO_micro,
  file.path(results_dir, "SO_micro_step14_lncRNA_analysis.rds")
)

saveRDS(
  SO_micro_main_DE,
  file.path(results_dir, "SO_micro_step14_main_DE_subset.rds")
)

write.csv(
  micro_state_cluster_summary,
  file.path(results_dir, "Step14_micro_state_cluster_summary.csv"),
  row.names = FALSE
)

write.csv(
  micro_state_group_summary,
  file.path(results_dir, "Step14_micro_state_group_summary.csv"),
  row.names = FALSE
)

if (nrow(micro_dam_vs_homeostatic_annotated) > 0) {
  write.csv(
    micro_dam_vs_homeostatic_annotated,
    file.path(results_dir, "Step14_DE_DAM_vs_homeostatic.csv"),
    row.names = FALSE
  )
}

write.csv(
  micro_PD_vs_C_annotated,
  file.path(results_dir, "Step14_DE_PD_vs_C_microglia.csv"),
  row.names = FALSE
)

write.csv(
  micro_cluster_markers_annotated,
  file.path(results_dir, "Step14_micro_cluster_markers_annotated.csv"),
  row.names = FALSE
)

if (nrow(dam_lncRNA_candidates) > 0) {
  write.csv(
    dam_lncRNA_candidates,
    file.path(results_dir, "Step14_DAM_lncRNA_candidates.csv"),
    row.names = FALSE
  )
}

write.csv(
  PD_lncRNA_candidates,
  file.path(results_dir, "Step14_PD_lncRNA_candidates.csv"),
  row.names = FALSE
)

write.csv(
  cluster_lncRNA_candidates,
  file.path(results_dir, "Step14_cluster_lncRNA_candidates.csv"),
  row.names = FALSE
)

if (nrow(priority_micro_lncRNAs) > 0) {
  write.csv(
    priority_micro_lncRNAs,
    file.path(results_dir, "Step14_priority_micro_lncRNA_candidates.csv"),
    row.names = FALSE
  )
  
  write.csv(
    driver_lncRNAs,
    file.path(results_dir, "Step14_driver_like_lncRNA_candidates.csv"),
    row.names = FALSE
  )
  
  write.csv(
    defender_lncRNAs,
    file.path(results_dir, "Step14_defender_like_lncRNA_candidates.csv"),
    row.names = FALSE
  )
}

write.csv(
  data.frame(
    setting = c(
      "input_object",
      "state_definition",
      "dam_clusters",
      "homeostatic_clusters",
      "other_excluded_from_main_DE_clusters",
      "combined_activation_score_usage",
      "min_pct_use",
      "logfc_threshold_use",
      "p_adj_cutoff",
      "lncRNA_logfc_cutoff",
      "n_main_DE_cells",
      "n_DAM_lncRNA_candidates",
      "n_PD_lncRNA_candidates",
      "n_cluster_lncRNA_candidates",
      "n_priority_lncRNAs",
      "n_driver_like_lncRNAs",
      "n_defender_like_lncRNAs"
    ),
    value = c(
      input_path,
      "cluster-based refined microglia states",
      paste(dam_clusters, collapse = ","),
      paste(homeostatic_clusters, collapse = ","),
      paste(other_clusters, collapse = ","),
      "z-scored summary/visualization only; not used for main DE grouping",
      min_pct_use,
      logfc_threshold_use,
      p_adj_cutoff,
      lncRNA_logfc_cutoff,
      ncol(SO_micro_main_DE),
      if(exists("dam_lncRNA_candidates")) nrow(dam_lncRNA_candidates) else 0,
      nrow(PD_lncRNA_candidates),
      nrow(cluster_lncRNA_candidates),
      if(exists("priority_micro_lncRNAs")) nrow(priority_micro_lncRNAs) else 0,
      if(exists("driver_lncRNAs")) nrow(driver_lncRNAs) else 0,
      if(exists("defender_lncRNAs")) nrow(defender_lncRNAs) else 0
    )
  ),
  file.path(results_dir, "Step14_analysis_settings.csv"),
  row.names = FALSE
)

11 Interpretation Note

This analysis prioritizes lncRNAs that are associated with microglia state and/or PD status.

The strongest candidates are not simply the most significant DE genes. They are candidates that satisfy several criteria:

1. Context-based lncRNA annotation from Step 13 using GENCODE v49
2. Differential expression in cluster-defined DAM microglia
3. Differential expression in PD microglia
4. Optional support from microglia subcluster markers
5. Interpretable direction of change

The final candidate list should be treated as a hypothesis-generating result. Any top lncRNA should be checked manually using genome annotation databases and literature before being described as novel or biologically meaningful.