Step 12.5: Fernando Object-Based Astrocyte lncRNA Analysis

Cluster-based differential expression and lncRNA candidate discovery from Fernando-defined astrocyte states

1 Setup: Environment and Data

This file starts from the astrocyte object generated in Step 11.5.

The input object should already contain:

Fernando-defined astrocyte cells
astrocyte subclusters
reactive / inflammatory / stress / ECM module scores
condition information

The main biological goal is to identify lncRNAs associated with a cluster-defined astrocyte state transition:

Homeostatic-like astrocytes
→ reactive / stress-like PD-enriched astrocytes

This is an astrocyte state analysis, not a broad cell-type discovery analysis.

Show code

library(Seurat)
library(tidyverse)
library(patchwork)

set.seed(1234)

project_dir <- "/Users/yoshimurasouhei/Downloads/010_school/4年生/bioinfomaticsリサーチクラークシップ/PD_2026"

input_path <- file.path(
  project_dir,
  "results",
  "Step11_5_fernando_astrocyte_harmony_reclustering",
  "SO_astro_step11_5_fernando_harmony_reclustered_scored.rds"
)

results_dir <- file.path(
  project_dir,
  "results",
  "Step12_5_fernando_astrocyte_lncRNA_analysis"
)

dir.create(results_dir, recursive = TRUE, showWarnings = FALSE)

SO_astro <- readRDS(input_path)

DefaultAssay(SO_astro) <- "RNA"

if ("Assay5" %in% class(SO_astro[["RNA"]])) {
  SO_astro <- JoinLayers(SO_astro, assay = "RNA")
}

SO_astro

An object of class Seurat 
87674 features across 5259 samples within 2 assays 
Active assay: RNA (47773 features, 2000 variable features)
 3 layers present: counts, data, scale.data
 1 other assay present: SCT
 6 dimensional reductions calculated: pca, umap, harmony, pca_astro, harmony_astro, umap_astro_harmony

Show code

required_cols <- c(
  "seurat_clusters_harmony",
  "condition",
  "sample",
  "sex",
  "Astro_identity_score1",
  "Reactive_astro_score1",
  "Inflammatory_score1",
  "Stress_score1",
  "ECM_score1",
  "umap_astro_harmony"
)

missing_cols <- setdiff(required_cols, c(colnames(SO_astro@meta.data), Reductions(SO_astro)))

if (length(missing_cols) > 0) {
  stop(
    paste(
      "Missing required Step 11.5 outputs:",
      paste(missing_cols, collapse = ", ")
    )
  )
}

table(SO_astro$seurat_clusters_harmony)


   0    1    2    3    4    5    6 
1602 1584 1256  395  265  102   55

Show code

table(SO_astro$condition)


   C   PD 
2336 2923

Show code

stopifnot("umap_astro_harmony" %in% Reductions(SO_astro))
stopifnot("seurat_clusters_harmony" %in% colnames(SO_astro@meta.data))
stopifnot(!any(is.na(SO_astro$seurat_clusters_harmony)))

2 Step 12.1: Refine Astrocyte State Annotation

Here, the main state definition is cluster-based, not quartile-based.

Based on Step 11.5 cluster summaries, the main groups are:

Reactive-like PD-enriched:
  cluster 0

Homeostatic-like:
  cluster 2

Excluded from main binary DE:
  clusters 1, 3, 4, 5, 6

Clusters 1, 3, 4, 5, and 6 are excluded from the main binary DE comparison because they represent background-stress-like or minor/intermediate states rather than the two extreme states used for candidate prioritization.

The combined reactivity score is calculated only for visualization and summary. It is not used to define the main DE groups.

Show code

SO_astro$astro_cluster <- as.character(SO_astro$seurat_clusters_harmony)

reactive_stress_clusters <- c("0")
homeostatic_clusters <- c("2")
background_stress_clusters <- c("1")
minor_or_intermediate_clusters <- c("3", "4", "5", "6")

SO_astro$astro_state_refined <- case_when(
  SO_astro$astro_cluster %in% reactive_stress_clusters ~ "PD_enriched_reactive_ECM_like",
  SO_astro$astro_cluster %in% homeostatic_clusters ~ "Homeostatic_like",
  SO_astro$astro_cluster %in% background_stress_clusters ~ "Background_stress_like",
  SO_astro$astro_cluster %in% minor_or_intermediate_clusters ~ "Minor_or_intermediate",
  TRUE ~ "Unassigned"
)

SO_astro$astro_state_refined <- factor(
  SO_astro$astro_state_refined,
  levels = c(
    "PD_enriched_reactive_ECM_like",
    "Background_stress_like",
    "Homeostatic_like",
    "Minor_or_intermediate",
    "Unassigned"
  )
)

table(SO_astro$astro_cluster, SO_astro$astro_state_refined)

   
    PD_enriched_reactive_ECM_like Background_stress_like Homeostatic_like
  0                          1602                      0                0
  1                             0                   1584                0
  2                             0                      0             1256
  3                             0                      0                0
  4                             0                      0                0
  5                             0                      0                0
  6                             0                      0                0
   
    Minor_or_intermediate Unassigned
  0                     0          0
  1                     0          0
  2                     0          0
  3                   395          0
  4                   265          0
  5                   102          0
  6                    55          0

Show code

table(SO_astro$astro_state_refined, SO_astro$condition)

                               
                                   C   PD
  PD_enriched_reactive_ECM_like  468 1134
  Background_stress_like         929  655
  Homeostatic_like               627  629
  Minor_or_intermediate          312  505
  Unassigned                       0    0

Show code

reactivity_score_matrix <- SO_astro@meta.data[, c(
  "Reactive_astro_score1",
  "Inflammatory_score1",
  "Stress_score1",
  "ECM_score1"
)]

SO_astro$combined_reactivity_score_z <- rowMeans(
  scale(reactivity_score_matrix),
  na.rm = TRUE
)

summary(SO_astro$combined_reactivity_score_z)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-1.2683 -0.5273 -0.1045  0.0000  0.4100  3.7505

Show code

astro_state_cluster_summary <- SO_astro@meta.data %>%
  mutate(
    astro_cluster = as.character(seurat_clusters_harmony),
    astro_state_refined = as.character(astro_state_refined)
  ) %>%
  group_by(astro_cluster, astro_state_refined) %>%
  summarise(
    n_cells = n(),
    mean_astro_identity = mean(Astro_identity_score1, na.rm = TRUE),
    mean_combined_reactivity_z = mean(combined_reactivity_score_z, na.rm = TRUE),
    mean_reactive = mean(Reactive_astro_score1, na.rm = TRUE),
    mean_inflammatory = mean(Inflammatory_score1, na.rm = TRUE),
    mean_stress = mean(Stress_score1, na.rm = TRUE),
    mean_ecm = mean(ECM_score1, na.rm = TRUE),
    percent_PD = mean(condition == "PD", na.rm = TRUE),
    percent_C = mean(condition == "C", na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(mean_combined_reactivity_z))

astro_state_cluster_summary

Show code

astro_state_group_summary <- SO_astro@meta.data %>%
  mutate(
    astro_cluster = as.character(seurat_clusters_harmony),
    astro_state_refined = as.character(astro_state_refined)
  ) %>%
  group_by(astro_state_refined) %>%
  summarise(
    n_cells = n(),
    mean_astro_identity = mean(Astro_identity_score1, na.rm = TRUE),
    mean_combined_reactivity_z = mean(combined_reactivity_score_z, na.rm = TRUE),
    mean_reactive = mean(Reactive_astro_score1, na.rm = TRUE),
    mean_inflammatory = mean(Inflammatory_score1, na.rm = TRUE),
    mean_stress = mean(Stress_score1, na.rm = TRUE),
    mean_ecm = mean(ECM_score1, na.rm = TRUE),
    percent_PD = mean(condition == "PD", na.rm = TRUE),
    percent_C = mean(condition == "C", na.rm = TRUE),
    clusters_included = paste(sort(unique(astro_cluster)), collapse = ", "),
    .groups = "drop"
  ) %>%
  arrange(desc(mean_combined_reactivity_z))

astro_state_group_summary

Show code

p_state <- DimPlot(
  SO_astro,
  reduction = "umap_astro_harmony",
  group.by = "astro_state_refined",
  label = TRUE,
  repel = TRUE,
  pt.size = 0.5
) +
  labs(title = "Cluster-Based Refined Astrocyte State Annotation")

p_score <- FeaturePlot(
  SO_astro,
  features = "combined_reactivity_score_z",
  reduction = "umap_astro_harmony",
  pt.size = 0.5
) +
  labs(title = "Z-scored Combined Astrocyte Reactivity Score")

p_state | p_score

Show code

SO_astro$astro_DE_group <- case_when(
  SO_astro$astro_state_refined == "PD_enriched_reactive_ECM_like" ~ "Reactive_like_PD_enriched",
  SO_astro$astro_state_refined == "Homeostatic_like" ~ "Homeostatic_like",
  TRUE ~ NA_character_
)

SO_astro$astro_DE_group <- factor(
  SO_astro$astro_DE_group,
  levels = c("Homeostatic_like", "Reactive_like_PD_enriched")
)

table(SO_astro$astro_DE_group, useNA = "ifany")


         Homeostatic_like Reactive_like_PD_enriched                      <NA> 
                     1256                      1602                      2401

Show code

table(SO_astro$astro_DE_group, SO_astro$condition, useNA = "ifany")

                           
                               C   PD
  Homeostatic_like           627  629
  Reactive_like_PD_enriched  468 1134
  <NA>                      1241 1160

Show code

write.csv(
  astro_state_cluster_summary,
  file.path(results_dir, "Step12_1_refined_astrocyte_state_cluster_summary.csv"),
  row.names = FALSE
)

write.csv(
  astro_state_group_summary,
  file.path(results_dir, "Step12_1_refined_astrocyte_state_group_summary.csv"),
  row.names = FALSE
)

write.csv(
  as.data.frame.matrix(table(SO_astro$astro_state_refined, SO_astro$condition)),
  file.path(results_dir, "Step12_1_refined_astrocyte_state_condition_table.csv")
)

write.csv(
  as.data.frame.matrix(table(SO_astro$astro_cluster, SO_astro$astro_state_refined)),
  file.path(results_dir, "Step12_1_cluster_to_refined_state_table.csv")
)

3 Step 12.2: Define Comparison Groups for Differential Expression

We define two main comparisons:

1. Reactive_like_PD_enriched vs Homeostatic_like
2. PD vs C within Fernando-defined astrocytes

The first comparison prioritizes cluster-defined astrocyte state-associated genes.
The second comparison prioritizes disease-associated genes across all Fernando-defined astrocytes.

Show code

SO_astro_main_DE <- subset(
  SO_astro,
  subset = !is.na(astro_DE_group)
)

SO_astro_main_DE$astro_DE_group <- droplevels(SO_astro_main_DE$astro_DE_group)

table(SO_astro_main_DE$astro_DE_group)


         Homeostatic_like Reactive_like_PD_enriched 
                     1256                      1602

Show code

table(SO_astro_main_DE$astro_DE_group, SO_astro_main_DE$condition)

                           
                               C   PD
  Homeostatic_like           627  629
  Reactive_like_PD_enriched  468 1134

Show code

min_pct_use <- 0.05
logfc_threshold_use <- 0.10
p_adj_cutoff <- 0.05
lncRNA_logfc_cutoff <- 0.25

min_pct_use

[1] 0.05

Show code

logfc_threshold_use

[1] 0.1

Show code

p_adj_cutoff

[1] 0.05

Show code

lncRNA_logfc_cutoff

[1] 0.25

4 Step 12.3: Differential Expression Analysis

We run DE for:

Reactive_like_PD_enriched vs Homeostatic_like
PD vs C

Show code

Idents(SO_astro_main_DE) <- SO_astro_main_DE$astro_DE_group

astro_reactive_vs_homeostatic_DE <- FindMarkers(
  SO_astro_main_DE,
  ident.1 = "Reactive_like_PD_enriched",
  ident.2 = "Homeostatic_like",
  min.pct = min_pct_use,
  logfc.threshold = logfc_threshold_use,
  test.use = "wilcox"
) %>%
  rownames_to_column("gene") %>%
  arrange(p_val_adj, desc(avg_log2FC))

astro_reactive_vs_homeostatic_DE %>%
  slice_head(n = 20)

Show code

Idents(SO_astro) <- SO_astro$condition

astro_PD_vs_C_DE <- FindMarkers(
  SO_astro,
  ident.1 = "PD",
  ident.2 = "C",
  min.pct = min_pct_use,
  logfc.threshold = logfc_threshold_use,
  test.use = "wilcox"
) %>%
  rownames_to_column("gene") %>%
  arrange(p_val_adj, desc(avg_log2FC))

astro_PD_vs_C_DE %>% head(20)

5 Step 12.4: Cluster-Specific DE Analysis

Here we extract markers for each astrocyte subcluster.

These are subcluster/state markers, not broad astrocyte identity markers.

Show code

Idents(SO_astro) <- SO_astro$seurat_clusters_harmony

astro_cluster_markers <- FindAllMarkers(
  SO_astro,
  only.pos = TRUE,
  min.pct = min_pct_use,
  logfc.threshold = logfc_threshold_use,
  test.use = "wilcox"
) %>%
  arrange(cluster, p_val_adj, desc(avg_log2FC))

astro_cluster_markers %>% head(30)

Show code

top_astro_cluster_markers <- astro_cluster_markers %>%
  group_by(cluster) %>%
  slice_min(order_by = p_val_adj, n = 20, with_ties = FALSE) %>%
  ungroup()

top_astro_cluster_markers

6 Step 12.5 / 12.6

Show code

lncRNA_annotation_path <- file.path(
  project_dir,
  "results",
  "Step11_5_fernando_astrocyte_harmony_reclustering",
  "Step11_5_astrocyte_lncRNA_gene_annotation_context_based.csv"
)

if (!file.exists(lncRNA_annotation_path)) {
  stop(
    paste(
      "Step 11.5 context-based lncRNA annotation file not found:",
      lncRNA_annotation_path
    )
  )
}

lncRNA_gene_annotation <- read.csv(
  lncRNA_annotation_path,
  stringsAsFactors = FALSE
) %>%
  as_tibble()

required_lncRNA_anno_cols <- c(
  "gene",
  "gencode_gene_id",
  "gencode_gene_name",
  "lncRNA_type",
  "is_lncRNA"
)

missing_lncRNA_anno_cols <- setdiff(
  required_lncRNA_anno_cols,
  colnames(lncRNA_gene_annotation)
)

if (length(missing_lncRNA_anno_cols) > 0) {
  stop(
    paste(
      "Missing columns in Step 11.5 lncRNA annotation:",
      paste(missing_lncRNA_anno_cols, collapse = ", ")
    )
  )
}

lncRNA_gene_annotation <- lncRNA_gene_annotation %>%
  dplyr::select(
    gene,
    gencode_gene_id,
    gencode_gene_name,
    lncRNA_type,
    is_lncRNA
  ) %>%
  distinct(gene, .keep_all = TRUE)

lncRNA_gene_annotation %>%
  count(lncRNA_type, name = "n_genes") %>%
  arrange(desc(n_genes))

Show code

annotate_de_table_with_lncRNA_context <- function(de_table) {
  de_table %>%
    left_join(
      lncRNA_gene_annotation,
      by = "gene"
    ) %>%
    mutate(
      is_lncRNA = if_else(is.na(is_lncRNA), FALSE, is_lncRNA),
      lncRNA_type = if_else(
        is.na(lncRNA_type),
        "Not_lncRNA_or_unmatched",
        lncRNA_type
      ),
      gene_symbol_for_plot = case_when(
        !is.na(gencode_gene_name) ~ gencode_gene_name,
        TRUE ~ gene
      )
    )
}

astro_reactive_vs_homeostatic_annotated <- annotate_de_table_with_lncRNA_context(
  astro_reactive_vs_homeostatic_DE
)

astro_PD_vs_C_annotated <- annotate_de_table_with_lncRNA_context(
  astro_PD_vs_C_DE
)

astro_cluster_markers_annotated <- annotate_de_table_with_lncRNA_context(
  astro_cluster_markers
)

7 Step 12.7: Extract Annotated lncRNA Candidates

We extract lncRNA-like candidates from each comparison.

Show code

reactive_lncRNA_candidates <- astro_reactive_vs_homeostatic_annotated %>%
  filter(
    is_lncRNA,
    p_val_adj < p_adj_cutoff,
    abs(avg_log2FC) >= lncRNA_logfc_cutoff
  ) %>%
  arrange(p_val_adj, desc(abs(avg_log2FC)))

PD_lncRNA_candidates <- astro_PD_vs_C_annotated %>%
  filter(
    is_lncRNA,
    p_val_adj < p_adj_cutoff,
    abs(avg_log2FC) >= lncRNA_logfc_cutoff
  ) %>%
  arrange(p_val_adj, desc(abs(avg_log2FC)))

cluster_lncRNA_candidates <- astro_cluster_markers_annotated %>%
  filter(
    is_lncRNA,
    p_val_adj < p_adj_cutoff,
    abs(avg_log2FC) >= lncRNA_logfc_cutoff
  ) %>%
  arrange(cluster, p_val_adj, desc(avg_log2FC))

reactive_lncRNA_candidates %>%
  dplyr::select(gene, gene_symbol_for_plot, lncRNA_type, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  slice_head(n = 30)

Show code

PD_lncRNA_candidates %>%
  dplyr::select(gene, gene_symbol_for_plot, lncRNA_type, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  slice_head(n = 30)

Show code

cluster_lncRNA_candidates %>%
  dplyr::select(cluster, gene, gene_symbol_for_plot, lncRNA_type, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  slice_head(n = 30)

8 Step 12.8: Prioritize lncRNAs Shared Across Comparisons

The strongest candidates are lncRNAs associated with both the reactive/stress-like astrocyte state and PD status.

Show code

priority_astro_lncRNAs <- reactive_lncRNA_candidates %>%
  dplyr::select(
    gene,
    gene_symbol_for_plot,
    gencode_gene_id,
    gencode_gene_name,
    lncRNA_type,
    avg_log2FC_reactive_vs_homeostatic = avg_log2FC,
    p_val_adj_reactive_vs_homeostatic = p_val_adj,
    pct.1_reactive = pct.1,
    pct.2_homeostatic = pct.2
  ) %>%
  inner_join(
    PD_lncRNA_candidates %>%
      dplyr::select(
        gene,
        avg_log2FC_PD_vs_C = avg_log2FC,
        p_val_adj_PD_vs_C = p_val_adj,
        pct.1_PD = pct.1,
        pct.2_C = pct.2
      ),
    by = "gene"
  ) %>%
  mutate(
    same_direction = sign(avg_log2FC_reactive_vs_homeostatic) == sign(avg_log2FC_PD_vs_C),
    priority_score =
      -log10(p_val_adj_reactive_vs_homeostatic + 1e-300) +
      -log10(p_val_adj_PD_vs_C + 1e-300) +
      abs(avg_log2FC_reactive_vs_homeostatic) +
      abs(avg_log2FC_PD_vs_C)
  ) %>%
  arrange(desc(same_direction), desc(priority_score))

priority_astro_lncRNAs

Show code

cluster_lncRNA_support <- cluster_lncRNA_candidates %>%
  group_by(gene) %>%
  summarise(
    cluster_support = paste(sort(unique(cluster)), collapse = ";"),
    max_cluster_log2FC = max(avg_log2FC, na.rm = TRUE),
    min_cluster_p_val_adj = min(p_val_adj, na.rm = TRUE),
    .groups = "drop"
  )

priority_astro_lncRNAs <- priority_astro_lncRNAs %>%
  left_join(cluster_lncRNA_support, by = "gene") %>%
  arrange(desc(same_direction), desc(priority_score))

priority_astro_lncRNAs %>%
  dplyr::select(
    gene,
    gene_symbol_for_plot,
    avg_log2FC_reactive_vs_homeostatic,
    avg_log2FC_PD_vs_C,
    p_val_adj_reactive_vs_homeostatic,
    p_val_adj_PD_vs_C,
    same_direction,
    priority_score,
    cluster_support
  ) %>%
  slice_head(n = 30)

Show code

driver_lncRNAs <- priority_astro_lncRNAs %>%
  filter(
    same_direction,
    avg_log2FC_reactive_vs_homeostatic > 0,
    avg_log2FC_PD_vs_C > 0
  )

defender_lncRNAs <- priority_astro_lncRNAs %>%
  filter(
    same_direction,
    avg_log2FC_reactive_vs_homeostatic < 0,
    avg_log2FC_PD_vs_C < 0
  )

driver_lncRNAs

Show code

defender_lncRNAs

9 Step 12.9: Visualize Top lncRNA Candidates

We visualize the top shared candidates when they exist.

If no shared candidates are found, we visualize the top candidates from the reactive/stress-like state comparison.

Show code

top_lncRNAs_for_plot <- priority_astro_lncRNAs %>%
  slice_head(n = 12) %>%
  pull(gene)

if (length(top_lncRNAs_for_plot) == 0) {
  top_lncRNAs_for_plot <- reactive_lncRNA_candidates %>%
    slice_head(n = 12) %>%
    pull(gene)
}

top_lncRNAs_for_plot <- intersect(top_lncRNAs_for_plot, rownames(SO_astro))

top_lncRNAs_for_plot

 [1] "MALAT1"          "ENSG00000299261" "OBI1-AS1"        "ENSG00000233942"
 [5] "TALAM1"          "LINC00499"       "ENSG00000228408" "MIR9-1HG"       
 [9] "ENSG00000287544" "PCDH9-AS2"       "POT1-AS1"        "SLC44A3-AS1"

Show code

if (length(top_lncRNAs_for_plot) > 0) {
  FeaturePlot(
    SO_astro,
    features = top_lncRNAs_for_plot,
    reduction = "umap_astro_harmony",
    ncol = 4,
    pt.size = 0.5
  )
} else {
  message("No lncRNA candidates available for FeaturePlot.")
}

Show code

if (length(top_lncRNAs_for_plot) > 0) {
  DotPlot(
    SO_astro,
    features = top_lncRNAs_for_plot,
    group.by = "astro_state_refined"
  ) +
    RotatedAxis() +
    labs(title = "Top lncRNA Candidates by Refined Astrocyte State")
} else {
  message("No lncRNA candidates available for DotPlot.")
}

Show code

if (length(top_lncRNAs_for_plot) > 0) {
  DotPlot(
    SO_astro,
    features = top_lncRNAs_for_plot,
    group.by = "condition"
  ) +
    RotatedAxis() +
    labs(title = "Top lncRNA Candidates by Condition")
} else {
  message("No lncRNA candidates available for DotPlot.")
}

10 Step 12.10: Save Astrocyte DE and lncRNA Discovery Results

Show code

saveRDS(
  SO_astro,
  file.path(results_dir, "SO_astro_step12_5_fernando_lncRNA_analysis.rds")
)

saveRDS(
  SO_astro_main_DE,
  file.path(results_dir, "SO_astro_step12_5_main_DE_subset.rds")
)

write.csv(
  astro_state_cluster_summary,
  file.path(results_dir, "Step12_5_astro_state_cluster_summary.csv"),
  row.names = FALSE
)

write.csv(
  astro_state_group_summary,
  file.path(results_dir, "Step12_5_astro_state_group_summary.csv"),
  row.names = FALSE
)

write.csv(
  astro_reactive_vs_homeostatic_annotated,
  file.path(results_dir, "Step12_5_DE_reactive_like_PD_enriched_vs_homeostatic_like.csv"),
  row.names = FALSE
)

write.csv(
  astro_PD_vs_C_annotated,
  file.path(results_dir, "Step12_5_DE_PD_vs_C_astrocytes.csv"),
  row.names = FALSE
)

write.csv(
  astro_cluster_markers_annotated,
  file.path(results_dir, "Step12_5_astro_cluster_markers_annotated.csv"),
  row.names = FALSE
)

write.csv(
  reactive_lncRNA_candidates,
  file.path(results_dir, "Step12_5_reactive_like_PD_enriched_lncRNA_candidates.csv"),
  row.names = FALSE
)

write.csv(
  PD_lncRNA_candidates,
  file.path(results_dir, "Step12_5_PD_lncRNA_candidates.csv"),
  row.names = FALSE
)

write.csv(
  cluster_lncRNA_candidates,
  file.path(results_dir, "Step12_5_cluster_lncRNA_candidates.csv"),
  row.names = FALSE
)

write.csv(
  priority_astro_lncRNAs,
  file.path(results_dir, "Step12_5_priority_astro_lncRNA_candidates.csv"),
  row.names = FALSE
)

write.csv(
  driver_lncRNAs,
  file.path(results_dir, "Step12_5_driver_like_lncRNA_candidates.csv"),
  row.names = FALSE
)

write.csv(
  defender_lncRNAs,
  file.path(results_dir, "Step12_5_defender_like_lncRNA_candidates.csv"),
  row.names = FALSE
)

write.csv(
  data.frame(
    setting = c(
      "input_object",
      "state_definition",
      "reactive_like_PD_enriched_clusters",
      "homeostatic_like_clusters",
      "excluded_from_main_DE_clusters",
      "combined_reactivity_score_usage",
      "min_pct_use",
      "logfc_threshold_use",
      "p_adj_cutoff",
      "lncRNA_logfc_cutoff",
      "n_main_DE_cells",
      "n_reactive_lncRNA_candidates",
      "n_PD_lncRNA_candidates",
      "n_cluster_lncRNA_candidates",
      "n_priority_lncRNAs",
      "n_driver_like_lncRNAs",
      "n_defender_like_lncRNAs"
    ),
    value = c(
      input_path,
      "cluster-based refined astrocyte states",
      paste(reactive_stress_clusters, collapse = ","),
      paste(homeostatic_clusters, collapse = ","),
      paste(c(background_stress_clusters, minor_or_intermediate_clusters), collapse = ","),
      "z-scored summary/visualization only; not used for main DE grouping",
      min_pct_use,
      logfc_threshold_use,
      p_adj_cutoff,
      lncRNA_logfc_cutoff,
      ncol(SO_astro_main_DE),
      nrow(reactive_lncRNA_candidates),
      nrow(PD_lncRNA_candidates),
      nrow(cluster_lncRNA_candidates),
      nrow(priority_astro_lncRNAs),
      nrow(driver_lncRNAs),
      nrow(defender_lncRNAs)
    )
  ),
  file.path(results_dir, "Step12_5_analysis_settings.csv"),
  row.names = FALSE
)

11 Interpretation Note

This analysis prioritizes lncRNAs that are associated with astrocyte state and/or PD status.

The strongest candidates are not simply the most significant DE genes. They are candidates that satisfy several criteria:

1. Context-based lncRNA annotation from Step 11.5 using GENCODE v49
2. Differential expression in cluster-defined reactive-like PD-enriched astrocytes
3. Differential expression in PD astrocytes
4. Optional support from astrocyte subcluster markers
5. Interpretable direction of change

The final candidate list should be treated as a hypothesis-generating result. Any top lncRNA should be checked manually using genome annotation databases and literature before being described as novel or biologically meaningful.