Step 12.5: Fernando Object-Based Astrocyte lncRNA Analysis

Cluster-based differential expression and lncRNA candidate discovery from Fernando-defined astrocyte states

1 Setup: Environment and Data

This file starts from the astrocyte object generated in Step 11.5.

The input object should already contain:

Fernando-defined astrocyte cells
astrocyte subclusters
reactive / inflammatory / stress / ECM module scores
condition information

The main biological goal is to identify lncRNAs associated with a cluster-defined astrocyte state transition:

Homeostatic-like astrocytes
→ reactive / stress-like PD-enriched astrocytes

This is an astrocyte state analysis, not a broad cell-type discovery analysis.

Show code
library(Seurat)
library(tidyverse)
library(patchwork)

set.seed(1234)

project_dir <- "/Users/yoshimurasouhei/Downloads/010_school/4年生/bioinfomaticsリサーチクラークシップ/PD_2026"

input_path <- file.path(
  project_dir,
  "results",
  "Step11_5_fernando_astrocyte_harmony_reclustering",
  "SO_astro_step11_5_fernando_harmony_reclustered_scored.rds"
)

results_dir <- file.path(
  project_dir,
  "results",
  "Step12_5_fernando_astrocyte_lncRNA_analysis"
)

dir.create(results_dir, recursive = TRUE, showWarnings = FALSE)

SO_astro <- readRDS(input_path)

DefaultAssay(SO_astro) <- "RNA"

if ("Assay5" %in% class(SO_astro[["RNA"]])) {
  SO_astro <- JoinLayers(SO_astro, assay = "RNA")
}

SO_astro
An object of class Seurat 
87674 features across 5259 samples within 2 assays 
Active assay: RNA (47773 features, 2000 variable features)
 3 layers present: counts, data, scale.data
 1 other assay present: SCT
 6 dimensional reductions calculated: pca, umap, harmony, pca_astro, harmony_astro, umap_astro_harmony
Show code
required_cols <- c(
  "seurat_clusters_harmony",
  "condition",
  "sample",
  "sex",
  "Astro_identity_score1",
  "Reactive_astro_score1",
  "Inflammatory_score1",
  "Stress_score1",
  "ECM_score1",
  "umap_astro_harmony"
)

missing_cols <- setdiff(required_cols, c(colnames(SO_astro@meta.data), Reductions(SO_astro)))

if (length(missing_cols) > 0) {
  stop(
    paste(
      "Missing required Step 11.5 outputs:",
      paste(missing_cols, collapse = ", ")
    )
  )
}

table(SO_astro$seurat_clusters_harmony)

   0    1    2    3    4    5    6 
1602 1584 1256  395  265  102   55 
Show code
table(SO_astro$condition)

   C   PD 
2336 2923 
Show code
stopifnot("umap_astro_harmony" %in% Reductions(SO_astro))
stopifnot("seurat_clusters_harmony" %in% colnames(SO_astro@meta.data))
stopifnot(!any(is.na(SO_astro$seurat_clusters_harmony)))

2 Step 12.1: Refine Astrocyte State Annotation

Here, the main state definition is cluster-based, not quartile-based.

Based on Step 11.5 cluster summaries, the main groups are:

Reactive-like PD-enriched:
  cluster 0

Homeostatic-like:
  cluster 2

Excluded from main binary DE:
  clusters 1, 3, 4, 5, 6

Clusters 1, 3, 4, 5, and 6 are excluded from the main binary DE comparison because they represent background-stress-like or minor/intermediate states rather than the two extreme states used for candidate prioritization.

The combined reactivity score is calculated only for visualization and summary. It is not used to define the main DE groups.

Show code
SO_astro$astro_cluster <- as.character(SO_astro$seurat_clusters_harmony)

reactive_stress_clusters <- c("0")
homeostatic_clusters <- c("2")
background_stress_clusters <- c("1")
minor_or_intermediate_clusters <- c("3", "4", "5", "6")

SO_astro$astro_state_refined <- case_when(
  SO_astro$astro_cluster %in% reactive_stress_clusters ~ "PD_enriched_reactive_ECM_like",
  SO_astro$astro_cluster %in% homeostatic_clusters ~ "Homeostatic_like",
  SO_astro$astro_cluster %in% background_stress_clusters ~ "Background_stress_like",
  SO_astro$astro_cluster %in% minor_or_intermediate_clusters ~ "Minor_or_intermediate",
  TRUE ~ "Unassigned"
)

SO_astro$astro_state_refined <- factor(
  SO_astro$astro_state_refined,
  levels = c(
    "PD_enriched_reactive_ECM_like",
    "Background_stress_like",
    "Homeostatic_like",
    "Minor_or_intermediate",
    "Unassigned"
  )
)

table(SO_astro$astro_cluster, SO_astro$astro_state_refined)
   
    PD_enriched_reactive_ECM_like Background_stress_like Homeostatic_like
  0                          1602                      0                0
  1                             0                   1584                0
  2                             0                      0             1256
  3                             0                      0                0
  4                             0                      0                0
  5                             0                      0                0
  6                             0                      0                0
   
    Minor_or_intermediate Unassigned
  0                     0          0
  1                     0          0
  2                     0          0
  3                   395          0
  4                   265          0
  5                   102          0
  6                    55          0
Show code
table(SO_astro$astro_state_refined, SO_astro$condition)
                               
                                   C   PD
  PD_enriched_reactive_ECM_like  468 1134
  Background_stress_like         929  655
  Homeostatic_like               627  629
  Minor_or_intermediate          312  505
  Unassigned                       0    0
Show code
reactivity_score_matrix <- SO_astro@meta.data[, c(
  "Reactive_astro_score1",
  "Inflammatory_score1",
  "Stress_score1",
  "ECM_score1"
)]

SO_astro$combined_reactivity_score_z <- rowMeans(
  scale(reactivity_score_matrix),
  na.rm = TRUE
)

summary(SO_astro$combined_reactivity_score_z)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-1.2683 -0.5273 -0.1045  0.0000  0.4100  3.7505 
Show code
astro_state_cluster_summary <- SO_astro@meta.data %>%
  mutate(
    astro_cluster = as.character(seurat_clusters_harmony),
    astro_state_refined = as.character(astro_state_refined)
  ) %>%
  group_by(astro_cluster, astro_state_refined) %>%
  summarise(
    n_cells = n(),
    mean_astro_identity = mean(Astro_identity_score1, na.rm = TRUE),
    mean_combined_reactivity_z = mean(combined_reactivity_score_z, na.rm = TRUE),
    mean_reactive = mean(Reactive_astro_score1, na.rm = TRUE),
    mean_inflammatory = mean(Inflammatory_score1, na.rm = TRUE),
    mean_stress = mean(Stress_score1, na.rm = TRUE),
    mean_ecm = mean(ECM_score1, na.rm = TRUE),
    percent_PD = mean(condition == "PD", na.rm = TRUE),
    percent_C = mean(condition == "C", na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(mean_combined_reactivity_z))

astro_state_cluster_summary
Show code
astro_state_group_summary <- SO_astro@meta.data %>%
  mutate(
    astro_cluster = as.character(seurat_clusters_harmony),
    astro_state_refined = as.character(astro_state_refined)
  ) %>%
  group_by(astro_state_refined) %>%
  summarise(
    n_cells = n(),
    mean_astro_identity = mean(Astro_identity_score1, na.rm = TRUE),
    mean_combined_reactivity_z = mean(combined_reactivity_score_z, na.rm = TRUE),
    mean_reactive = mean(Reactive_astro_score1, na.rm = TRUE),
    mean_inflammatory = mean(Inflammatory_score1, na.rm = TRUE),
    mean_stress = mean(Stress_score1, na.rm = TRUE),
    mean_ecm = mean(ECM_score1, na.rm = TRUE),
    percent_PD = mean(condition == "PD", na.rm = TRUE),
    percent_C = mean(condition == "C", na.rm = TRUE),
    clusters_included = paste(sort(unique(astro_cluster)), collapse = ", "),
    .groups = "drop"
  ) %>%
  arrange(desc(mean_combined_reactivity_z))

astro_state_group_summary
Show code
p_state <- DimPlot(
  SO_astro,
  reduction = "umap_astro_harmony",
  group.by = "astro_state_refined",
  label = TRUE,
  repel = TRUE,
  pt.size = 0.5
) +
  labs(title = "Cluster-Based Refined Astrocyte State Annotation")

p_score <- FeaturePlot(
  SO_astro,
  features = "combined_reactivity_score_z",
  reduction = "umap_astro_harmony",
  pt.size = 0.5
) +
  labs(title = "Z-scored Combined Astrocyte Reactivity Score")

p_state | p_score

Show code
SO_astro$astro_DE_group <- case_when(
  SO_astro$astro_state_refined == "PD_enriched_reactive_ECM_like" ~ "Reactive_like_PD_enriched",
  SO_astro$astro_state_refined == "Homeostatic_like" ~ "Homeostatic_like",
  TRUE ~ NA_character_
)

SO_astro$astro_DE_group <- factor(
  SO_astro$astro_DE_group,
  levels = c("Homeostatic_like", "Reactive_like_PD_enriched")
)

table(SO_astro$astro_DE_group, useNA = "ifany")

         Homeostatic_like Reactive_like_PD_enriched                      <NA> 
                     1256                      1602                      2401 
Show code
table(SO_astro$astro_DE_group, SO_astro$condition, useNA = "ifany")
                           
                               C   PD
  Homeostatic_like           627  629
  Reactive_like_PD_enriched  468 1134
  <NA>                      1241 1160
Show code
write.csv(
  astro_state_cluster_summary,
  file.path(results_dir, "Step12_1_refined_astrocyte_state_cluster_summary.csv"),
  row.names = FALSE
)

write.csv(
  astro_state_group_summary,
  file.path(results_dir, "Step12_1_refined_astrocyte_state_group_summary.csv"),
  row.names = FALSE
)

write.csv(
  as.data.frame.matrix(table(SO_astro$astro_state_refined, SO_astro$condition)),
  file.path(results_dir, "Step12_1_refined_astrocyte_state_condition_table.csv")
)

write.csv(
  as.data.frame.matrix(table(SO_astro$astro_cluster, SO_astro$astro_state_refined)),
  file.path(results_dir, "Step12_1_cluster_to_refined_state_table.csv")
)

3 Step 12.2: Define Comparison Groups for Differential Expression

We define two main comparisons:

1. Reactive_like_PD_enriched vs Homeostatic_like
2. PD vs C within Fernando-defined astrocytes

The first comparison prioritizes cluster-defined astrocyte state-associated genes.
The second comparison prioritizes disease-associated genes across all Fernando-defined astrocytes.

Show code
SO_astro_main_DE <- subset(
  SO_astro,
  subset = !is.na(astro_DE_group)
)

SO_astro_main_DE$astro_DE_group <- droplevels(SO_astro_main_DE$astro_DE_group)

table(SO_astro_main_DE$astro_DE_group)

         Homeostatic_like Reactive_like_PD_enriched 
                     1256                      1602 
Show code
table(SO_astro_main_DE$astro_DE_group, SO_astro_main_DE$condition)
                           
                               C   PD
  Homeostatic_like           627  629
  Reactive_like_PD_enriched  468 1134
Show code
min_pct_use <- 0.05
logfc_threshold_use <- 0.10
p_adj_cutoff <- 0.05
lncRNA_logfc_cutoff <- 0.25

min_pct_use
[1] 0.05
Show code
logfc_threshold_use
[1] 0.1
Show code
p_adj_cutoff
[1] 0.05
Show code
lncRNA_logfc_cutoff
[1] 0.25

4 Step 12.3: Differential Expression Analysis

We run DE for:

Reactive_like_PD_enriched vs Homeostatic_like
PD vs C
Show code
Idents(SO_astro_main_DE) <- SO_astro_main_DE$astro_DE_group

astro_reactive_vs_homeostatic_DE <- FindMarkers(
  SO_astro_main_DE,
  ident.1 = "Reactive_like_PD_enriched",
  ident.2 = "Homeostatic_like",
  min.pct = min_pct_use,
  logfc.threshold = logfc_threshold_use,
  test.use = "wilcox"
) %>%
  rownames_to_column("gene") %>%
  arrange(p_val_adj, desc(avg_log2FC))

astro_reactive_vs_homeostatic_DE %>%
  slice_head(n = 20)
Show code
Idents(SO_astro) <- SO_astro$condition

astro_PD_vs_C_DE <- FindMarkers(
  SO_astro,
  ident.1 = "PD",
  ident.2 = "C",
  min.pct = min_pct_use,
  logfc.threshold = logfc_threshold_use,
  test.use = "wilcox"
) %>%
  rownames_to_column("gene") %>%
  arrange(p_val_adj, desc(avg_log2FC))

astro_PD_vs_C_DE %>% head(20)

5 Step 12.4: Cluster-Specific DE Analysis

Here we extract markers for each astrocyte subcluster.

These are subcluster/state markers, not broad astrocyte identity markers.

Show code
Idents(SO_astro) <- SO_astro$seurat_clusters_harmony

astro_cluster_markers <- FindAllMarkers(
  SO_astro,
  only.pos = TRUE,
  min.pct = min_pct_use,
  logfc.threshold = logfc_threshold_use,
  test.use = "wilcox"
) %>%
  arrange(cluster, p_val_adj, desc(avg_log2FC))

astro_cluster_markers %>% head(30)
Show code
top_astro_cluster_markers <- astro_cluster_markers %>%
  group_by(cluster) %>%
  slice_min(order_by = p_val_adj, n = 20, with_ties = FALSE) %>%
  ungroup()

top_astro_cluster_markers

6 Step 12.5 / 12.6

Show code
lncRNA_annotation_path <- file.path(
  project_dir,
  "results",
  "Step11_5_fernando_astrocyte_harmony_reclustering",
  "Step11_5_astrocyte_lncRNA_gene_annotation_context_based.csv"
)

if (!file.exists(lncRNA_annotation_path)) {
  stop(
    paste(
      "Step 11.5 context-based lncRNA annotation file not found:",
      lncRNA_annotation_path
    )
  )
}

lncRNA_gene_annotation <- read.csv(
  lncRNA_annotation_path,
  stringsAsFactors = FALSE
) %>%
  as_tibble()

required_lncRNA_anno_cols <- c(
  "gene",
  "gencode_gene_id",
  "gencode_gene_name",
  "lncRNA_type",
  "is_lncRNA"
)

missing_lncRNA_anno_cols <- setdiff(
  required_lncRNA_anno_cols,
  colnames(lncRNA_gene_annotation)
)

if (length(missing_lncRNA_anno_cols) > 0) {
  stop(
    paste(
      "Missing columns in Step 11.5 lncRNA annotation:",
      paste(missing_lncRNA_anno_cols, collapse = ", ")
    )
  )
}

lncRNA_gene_annotation <- lncRNA_gene_annotation %>%
  dplyr::select(
    gene,
    gencode_gene_id,
    gencode_gene_name,
    lncRNA_type,
    is_lncRNA
  ) %>%
  distinct(gene, .keep_all = TRUE)

lncRNA_gene_annotation %>%
  count(lncRNA_type, name = "n_genes") %>%
  arrange(desc(n_genes))
Show code
annotate_de_table_with_lncRNA_context <- function(de_table) {
  de_table %>%
    left_join(
      lncRNA_gene_annotation,
      by = "gene"
    ) %>%
    mutate(
      is_lncRNA = if_else(is.na(is_lncRNA), FALSE, is_lncRNA),
      lncRNA_type = if_else(
        is.na(lncRNA_type),
        "Not_lncRNA_or_unmatched",
        lncRNA_type
      ),
      gene_symbol_for_plot = case_when(
        !is.na(gencode_gene_name) ~ gencode_gene_name,
        TRUE ~ gene
      )
    )
}

astro_reactive_vs_homeostatic_annotated <- annotate_de_table_with_lncRNA_context(
  astro_reactive_vs_homeostatic_DE
)

astro_PD_vs_C_annotated <- annotate_de_table_with_lncRNA_context(
  astro_PD_vs_C_DE
)

astro_cluster_markers_annotated <- annotate_de_table_with_lncRNA_context(
  astro_cluster_markers
)

7 Step 12.7: Extract Annotated lncRNA Candidates

We extract lncRNA-like candidates from each comparison.

Show code
reactive_lncRNA_candidates <- astro_reactive_vs_homeostatic_annotated %>%
  filter(
    is_lncRNA,
    p_val_adj < p_adj_cutoff,
    abs(avg_log2FC) >= lncRNA_logfc_cutoff
  ) %>%
  arrange(p_val_adj, desc(abs(avg_log2FC)))

PD_lncRNA_candidates <- astro_PD_vs_C_annotated %>%
  filter(
    is_lncRNA,
    p_val_adj < p_adj_cutoff,
    abs(avg_log2FC) >= lncRNA_logfc_cutoff
  ) %>%
  arrange(p_val_adj, desc(abs(avg_log2FC)))

cluster_lncRNA_candidates <- astro_cluster_markers_annotated %>%
  filter(
    is_lncRNA,
    p_val_adj < p_adj_cutoff,
    abs(avg_log2FC) >= lncRNA_logfc_cutoff
  ) %>%
  arrange(cluster, p_val_adj, desc(avg_log2FC))

reactive_lncRNA_candidates %>%
  dplyr::select(gene, gene_symbol_for_plot, lncRNA_type, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  slice_head(n = 30)
Show code
PD_lncRNA_candidates %>%
  dplyr::select(gene, gene_symbol_for_plot, lncRNA_type, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  slice_head(n = 30)
Show code
cluster_lncRNA_candidates %>%
  dplyr::select(cluster, gene, gene_symbol_for_plot, lncRNA_type, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  slice_head(n = 30)

8 Step 12.8: Prioritize lncRNAs Shared Across Comparisons

The strongest candidates are lncRNAs associated with both the reactive/stress-like astrocyte state and PD status.

Show code
priority_astro_lncRNAs <- reactive_lncRNA_candidates %>%
  dplyr::select(
    gene,
    gene_symbol_for_plot,
    gencode_gene_id,
    gencode_gene_name,
    lncRNA_type,
    avg_log2FC_reactive_vs_homeostatic = avg_log2FC,
    p_val_adj_reactive_vs_homeostatic = p_val_adj,
    pct.1_reactive = pct.1,
    pct.2_homeostatic = pct.2
  ) %>%
  inner_join(
    PD_lncRNA_candidates %>%
      dplyr::select(
        gene,
        avg_log2FC_PD_vs_C = avg_log2FC,
        p_val_adj_PD_vs_C = p_val_adj,
        pct.1_PD = pct.1,
        pct.2_C = pct.2
      ),
    by = "gene"
  ) %>%
  mutate(
    same_direction = sign(avg_log2FC_reactive_vs_homeostatic) == sign(avg_log2FC_PD_vs_C),
    priority_score =
      -log10(p_val_adj_reactive_vs_homeostatic + 1e-300) +
      -log10(p_val_adj_PD_vs_C + 1e-300) +
      abs(avg_log2FC_reactive_vs_homeostatic) +
      abs(avg_log2FC_PD_vs_C)
  ) %>%
  arrange(desc(same_direction), desc(priority_score))

priority_astro_lncRNAs
Show code
cluster_lncRNA_support <- cluster_lncRNA_candidates %>%
  group_by(gene) %>%
  summarise(
    cluster_support = paste(sort(unique(cluster)), collapse = ";"),
    max_cluster_log2FC = max(avg_log2FC, na.rm = TRUE),
    min_cluster_p_val_adj = min(p_val_adj, na.rm = TRUE),
    .groups = "drop"
  )

priority_astro_lncRNAs <- priority_astro_lncRNAs %>%
  left_join(cluster_lncRNA_support, by = "gene") %>%
  arrange(desc(same_direction), desc(priority_score))

priority_astro_lncRNAs %>%
  dplyr::select(
    gene,
    gene_symbol_for_plot,
    avg_log2FC_reactive_vs_homeostatic,
    avg_log2FC_PD_vs_C,
    p_val_adj_reactive_vs_homeostatic,
    p_val_adj_PD_vs_C,
    same_direction,
    priority_score,
    cluster_support
  ) %>%
  slice_head(n = 30)
Show code
driver_lncRNAs <- priority_astro_lncRNAs %>%
  filter(
    same_direction,
    avg_log2FC_reactive_vs_homeostatic > 0,
    avg_log2FC_PD_vs_C > 0
  )

defender_lncRNAs <- priority_astro_lncRNAs %>%
  filter(
    same_direction,
    avg_log2FC_reactive_vs_homeostatic < 0,
    avg_log2FC_PD_vs_C < 0
  )

driver_lncRNAs
Show code
defender_lncRNAs

9 Step 12.9: Visualize Top lncRNA Candidates

We visualize the top shared candidates when they exist.

If no shared candidates are found, we visualize the top candidates from the reactive/stress-like state comparison.

Show code
top_lncRNAs_for_plot <- priority_astro_lncRNAs %>%
  slice_head(n = 12) %>%
  pull(gene)

if (length(top_lncRNAs_for_plot) == 0) {
  top_lncRNAs_for_plot <- reactive_lncRNA_candidates %>%
    slice_head(n = 12) %>%
    pull(gene)
}

top_lncRNAs_for_plot <- intersect(top_lncRNAs_for_plot, rownames(SO_astro))

top_lncRNAs_for_plot
 [1] "MALAT1"          "ENSG00000299261" "OBI1-AS1"        "ENSG00000233942"
 [5] "TALAM1"          "LINC00499"       "ENSG00000228408" "MIR9-1HG"       
 [9] "ENSG00000287544" "PCDH9-AS2"       "POT1-AS1"        "SLC44A3-AS1"    
Show code
if (length(top_lncRNAs_for_plot) > 0) {
  FeaturePlot(
    SO_astro,
    features = top_lncRNAs_for_plot,
    reduction = "umap_astro_harmony",
    ncol = 4,
    pt.size = 0.5
  )
} else {
  message("No lncRNA candidates available for FeaturePlot.")
}

Show code
if (length(top_lncRNAs_for_plot) > 0) {
  DotPlot(
    SO_astro,
    features = top_lncRNAs_for_plot,
    group.by = "astro_state_refined"
  ) +
    RotatedAxis() +
    labs(title = "Top lncRNA Candidates by Refined Astrocyte State")
} else {
  message("No lncRNA candidates available for DotPlot.")
}

Show code
if (length(top_lncRNAs_for_plot) > 0) {
  DotPlot(
    SO_astro,
    features = top_lncRNAs_for_plot,
    group.by = "condition"
  ) +
    RotatedAxis() +
    labs(title = "Top lncRNA Candidates by Condition")
} else {
  message("No lncRNA candidates available for DotPlot.")
}

10 Step 12.10: Save Astrocyte DE and lncRNA Discovery Results

Show code
saveRDS(
  SO_astro,
  file.path(results_dir, "SO_astro_step12_5_fernando_lncRNA_analysis.rds")
)

saveRDS(
  SO_astro_main_DE,
  file.path(results_dir, "SO_astro_step12_5_main_DE_subset.rds")
)

write.csv(
  astro_state_cluster_summary,
  file.path(results_dir, "Step12_5_astro_state_cluster_summary.csv"),
  row.names = FALSE
)

write.csv(
  astro_state_group_summary,
  file.path(results_dir, "Step12_5_astro_state_group_summary.csv"),
  row.names = FALSE
)

write.csv(
  astro_reactive_vs_homeostatic_annotated,
  file.path(results_dir, "Step12_5_DE_reactive_like_PD_enriched_vs_homeostatic_like.csv"),
  row.names = FALSE
)

write.csv(
  astro_PD_vs_C_annotated,
  file.path(results_dir, "Step12_5_DE_PD_vs_C_astrocytes.csv"),
  row.names = FALSE
)

write.csv(
  astro_cluster_markers_annotated,
  file.path(results_dir, "Step12_5_astro_cluster_markers_annotated.csv"),
  row.names = FALSE
)

write.csv(
  reactive_lncRNA_candidates,
  file.path(results_dir, "Step12_5_reactive_like_PD_enriched_lncRNA_candidates.csv"),
  row.names = FALSE
)

write.csv(
  PD_lncRNA_candidates,
  file.path(results_dir, "Step12_5_PD_lncRNA_candidates.csv"),
  row.names = FALSE
)

write.csv(
  cluster_lncRNA_candidates,
  file.path(results_dir, "Step12_5_cluster_lncRNA_candidates.csv"),
  row.names = FALSE
)

write.csv(
  priority_astro_lncRNAs,
  file.path(results_dir, "Step12_5_priority_astro_lncRNA_candidates.csv"),
  row.names = FALSE
)

write.csv(
  driver_lncRNAs,
  file.path(results_dir, "Step12_5_driver_like_lncRNA_candidates.csv"),
  row.names = FALSE
)

write.csv(
  defender_lncRNAs,
  file.path(results_dir, "Step12_5_defender_like_lncRNA_candidates.csv"),
  row.names = FALSE
)

write.csv(
  data.frame(
    setting = c(
      "input_object",
      "state_definition",
      "reactive_like_PD_enriched_clusters",
      "homeostatic_like_clusters",
      "excluded_from_main_DE_clusters",
      "combined_reactivity_score_usage",
      "min_pct_use",
      "logfc_threshold_use",
      "p_adj_cutoff",
      "lncRNA_logfc_cutoff",
      "n_main_DE_cells",
      "n_reactive_lncRNA_candidates",
      "n_PD_lncRNA_candidates",
      "n_cluster_lncRNA_candidates",
      "n_priority_lncRNAs",
      "n_driver_like_lncRNAs",
      "n_defender_like_lncRNAs"
    ),
    value = c(
      input_path,
      "cluster-based refined astrocyte states",
      paste(reactive_stress_clusters, collapse = ","),
      paste(homeostatic_clusters, collapse = ","),
      paste(c(background_stress_clusters, minor_or_intermediate_clusters), collapse = ","),
      "z-scored summary/visualization only; not used for main DE grouping",
      min_pct_use,
      logfc_threshold_use,
      p_adj_cutoff,
      lncRNA_logfc_cutoff,
      ncol(SO_astro_main_DE),
      nrow(reactive_lncRNA_candidates),
      nrow(PD_lncRNA_candidates),
      nrow(cluster_lncRNA_candidates),
      nrow(priority_astro_lncRNAs),
      nrow(driver_lncRNAs),
      nrow(defender_lncRNAs)
    )
  ),
  file.path(results_dir, "Step12_5_analysis_settings.csv"),
  row.names = FALSE
)

11 Interpretation Note

This analysis prioritizes lncRNAs that are associated with astrocyte state and/or PD status.

The strongest candidates are not simply the most significant DE genes. They are candidates that satisfy several criteria:

1. Context-based lncRNA annotation from Step 11.5 using GENCODE v49
2. Differential expression in cluster-defined reactive-like PD-enriched astrocytes
3. Differential expression in PD astrocytes
4. Optional support from astrocyte subcluster markers
5. Interpretable direction of change

The final candidate list should be treated as a hypothesis-generating result. Any top lncRNA should be checked manually using genome annotation databases and literature before being described as novel or biologically meaningful.