library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(RColorBrewer)
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0.9000     ✔ readr     2.1.5     
## ✔ ggplot2   3.5.2          ✔ stringr   1.5.1     
## ✔ lubridate 1.9.4          ✔ tibble    3.2.1     
## ✔ purrr     1.0.4          ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(EnhancedVolcano)
## Loading required package: ggrepel
## Warning: package 'ggrepel' was built under R version 4.3.3
library(readr)
library(dplyr)
library(ggplot2)
library(ggpubr)
library(clusterProfiler)
## Warning: package 'clusterProfiler' was built under R version 4.3.1
## 
## Registered S3 methods overwritten by 'treeio':
##   method              from    
##   MRCA.phylo          tidytree
##   MRCA.treedata       tidytree
##   Nnode.treedata      tidytree
##   Ntip.treedata       tidytree
##   ancestor.phylo      tidytree
##   ancestor.treedata   tidytree
##   child.phylo         tidytree
##   child.treedata      tidytree
##   full_join.phylo     tidytree
##   full_join.treedata  tidytree
##   groupClade.phylo    tidytree
##   groupClade.treedata tidytree
##   groupOTU.phylo      tidytree
##   groupOTU.treedata   tidytree
##   inner_join.phylo    tidytree
##   inner_join.treedata tidytree
##   is.rooted.treedata  tidytree
##   nodeid.phylo        tidytree
##   nodeid.treedata     tidytree
##   nodelab.phylo       tidytree
##   nodelab.treedata    tidytree
##   offspring.phylo     tidytree
##   offspring.treedata  tidytree
##   parent.phylo        tidytree
##   parent.treedata     tidytree
##   root.treedata       tidytree
##   rootnode.phylo      tidytree
##   sibling.phylo       tidytree
## clusterProfiler v4.8.3  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
## 
## Attaching package: 'clusterProfiler'
## 
## The following object is masked from 'package:purrr':
## 
##     simplify
## 
## The following object is masked from 'package:stats':
## 
##     filter
library(enrichR)
## Warning: package 'enrichR' was built under R version 4.3.3
## Welcome to enrichR
## Checking connections ... 
## Enrichr ... Connection is Live!
## FlyEnrichr ... Connection is Live!
## WormEnrichr ... Connection is Live!
## YeastEnrichr ... Connection is Live!
## FishEnrichr ... Connection is Live!
## OxEnrichr ... Connection is Live!
library(org.Mm.eg.db)
## Loading required package: AnnotationDbi
## Warning: package 'AnnotationDbi' was built under R version 4.3.2
## Loading required package: stats4
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.3.1
## 
## Attaching package: 'BiocGenerics'
## 
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## 
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## 
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## 
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 4.3.1
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 4.3.1
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.3.2
## 
## Attaching package: 'S4Vectors'
## 
## The following object is masked from 'package:clusterProfiler':
## 
##     rename
## 
## The following objects are masked from 'package:lubridate':
## 
##     second, second<-
## 
## The following object is masked from 'package:tidyr':
## 
##     expand
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## 
## The following object is masked from 'package:utils':
## 
##     findMatches
## 
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## 
## Attaching package: 'IRanges'
## 
## The following object is masked from 'package:clusterProfiler':
## 
##     slice
## 
## The following object is masked from 'package:lubridate':
## 
##     %within%
## 
## The following object is masked from 'package:purrr':
## 
##     reduce
## 
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## 
## 
## Attaching package: 'AnnotationDbi'
## 
## The following object is masked from 'package:clusterProfiler':
## 
##     select
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## GO for Microglia Shk VS. PBS
library(readr)
Microglia_Shk_vs_PBS <- read_csv("~/Desktop/PAP1.Celltype.DEG/shk/Microglia_Shk_vs_PBS.csv")
## New names:
## Rows: 581 Columns: 10
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): name, diffexpressed, Expression.Padjust dbl (7): ...1, pval, adj_pval,
## f_statistic, df1, df2, lfc
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
table(Microglia_Shk_vs_PBS$Expression.Padjust)
## 
## DOWN   NO   UP 
##   16  530   35
resDF <- Microglia_Shk_vs_PBS
 
# Count groups
up_count <- sum(resDF$Expression.Padjust == "UP", na.rm = TRUE)
down_count <- sum(resDF$Expression.Padjust == "DOWN", na.rm = TRUE)
no_count <- sum(resDF$Expression.Padjust == "NO", na.rm = TRUE)

# Create legend label
legend_title <- paste0("Expression\nUP: ", up_count,
                       " | DOWN: ", down_count,
                       " | NS: ", no_count)

# Top 15 UP and DOWN
top_up <- resDF %>% filter(Expression.Padjust == "UP") %>% arrange(adj_pval) %>% head(10)
top_down <- resDF %>% filter(Expression.Padjust == "DOWN") %>% arrange(adj_pval) %>% head(10)
top_genes <- bind_rows(top_up, top_down)

#####
 Microglia_Shk_vs_PBS.UP <- subset( Microglia_Shk_vs_PBS, Expression.Padjust == "UP")
 Microglia_Shk_vs_PBS.UP <-  Microglia_Shk_vs_PBS.UP$name

Microglia_Shk_vs_PBS.DOWN <- subset(Microglia_Shk_vs_PBS, Expression.Padjust == "DOWN")
Microglia_Shk_vs_PBS.DOWN <- Microglia_Shk_vs_PBS.DOWN$name

## GO Enrichment for Microglia UP (PAP1 vs. CONTROL)
go_up <- enrichGO(
  gene =  Microglia_Shk_vs_PBS.UP,
  OrgDb = org.Mm.eg.db,
  keyType = "SYMBOL",       # <- Correct keyType
  ont = "ALL",              # Ontologies: BP, MF, CC
  pAdjustMethod = "BH",
  qvalueCutoff = 1,         # No filtering yet
  readable = FALSE
)

go_down <- enrichGO(
  gene = Microglia_Shk_vs_PBS.DOWN,
  OrgDb = org.Mm.eg.db,
  keyType = "SYMBOL",
  ont = "ALL",
  pAdjustMethod = "BH",
  qvalueCutoff = 1,
  readable = FALSE
)

# Extract and label results — use -log10(p.adjust) instead of NES
Microglia_Shk_vs_PBS.GO_UP.result <- go_up@result
Microglia_Shk_vs_PBS.GO_UP.result$logFDR <- -log10(Microglia_Shk_vs_PBS.GO_UP.result$p.adjust)
Microglia_Shk_vs_PBS.GO_UP.result$Expression <- "UP"
Microglia_Shk_vs_PBS.GO_UP.result$Type <- "Shk.vs.PBS"

Microglia_Shk_vs_PBS.GO_Down.result <- go_down@result
Microglia_Shk_vs_PBS.GO_Down.result$logFDR <- -log10(Microglia_Shk_vs_PBS.GO_Down.result$p.adjust)
Microglia_Shk_vs_PBS.GO_Down.result$Expression <- "DOWN"
Microglia_Shk_vs_PBS.GO_Down.result$Type <- "Shk.vs.PBS"

saveRDS(Microglia_Shk_vs_PBS.GO_UP.result, file = 'Microglia_Shk_vs_PBS.GO_UP.result.rds')
write.csv(Microglia_Shk_vs_PBS.GO_UP.result, file = 'Microglia_Shk_vs_PBS.GO_UP.result.csv')
saveRDS(Microglia_Shk_vs_PBS.GO_Down.result, file = 'Microglia_Shk_vs_PBS.GO_Down.result.rds')
write.csv(Microglia_Shk_vs_PBS.GO_Down.result, file = 'Microglia_Shk_vs_PBS.GO_Down.result.csv')
## GO plot
library(ggplot2)
library(dplyr)
library(stringr)

# Combine Up and Down GO terms
# Combine Up and Down GO terms
go_combined <- rbind(
    Microglia_Shk_vs_PBS.GO_UP.result %>% mutate(Regulation = "Upregulated"),
    Microglia_Shk_vs_PBS.GO_Down.result %>% mutate(Regulation = "Downregulated")
)


# Calculate signed -log10(p.adjust)
go_combined <- go_combined %>%
    mutate(
        log10FDR = -log10(p.adjust),
        signed.log10FDR = ifelse(Regulation == "Downregulated", -log10FDR, log10FDR)
    )

# Optional: Add gene symbols (up to 3 per term)
go_combined <- go_combined %>%
    mutate(
        GeneSymbols = sapply(strsplit(geneID, "/"), function(g) {
            unique_genes <- unique(g)
            paste(head(unique_genes, 3), collapse = ", ")
        })
    )

# Select top 5 from each direction
top_go_plot_data <- go_combined %>%
    group_by(Regulation) %>%
    slice_max(order_by = abs(signed.log10FDR), n = 6) %>%
    ungroup()

# Order GO terms to show Up on top, Down below
top_go_plot_data <- top_go_plot_data %>%
    arrange(desc(Regulation), desc(signed.log10FDR))

top_go_plot_data$Description <- factor(
    top_go_plot_data$Description,
    levels = rev(top_go_plot_data$Description)
)

# Plot
plot_go <- ggplot(top_go_plot_data, aes(x = signed.log10FDR, y = Description, fill = Regulation)) +
    geom_bar(stat = "identity", width = 0.7) +
    scale_fill_manual(values = c("Upregulated" = "red", "Downregulated" = "blue")) +
    geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.8) +
    theme_minimal(base_size = 14) +
    theme(
        panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
        panel.grid = element_blank(),
        axis.text.y = element_text(size = 14),
        axis.title = element_text(face = "bold"),
        plot.title = element_text(face = "bold", hjust = 0.5),
        legend.position = "right"
    ) +
    labs(
        title = "GO:Microglia Shk vs.PBS",
        x = expression(-log[10](AdjP~value)),
        y = "GO Term"
    ) +
    xlim(-10, 10)  # fixed symmetric axis

# Show plot
plot_go

# Save as PDF
ggsave("GO.Microglia Shk vs.PBS.pdf", plot = plot_go, width = 10, height = 6)
write.csv(top_go_plot_data, file = 'Top.GO.Microglia.Shk.vs.PBS.csv')
#go_combined
write.csv(go_combined, file = 'GO_combined.Microglia.Shk.vs.PBS.csv')
library(readr)
Oligodendrocyte_Pap1_vs_Control <- read_csv("~/Desktop/PAP1.Celltype.DEG/Oligodendrocyte_Pap1_vs_Control.csv")
## New names:
## Rows: 2494 Columns: 10
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): name, diffexpressed, Expression.Padjust dbl (7): ...1, pval, adj_pval,
## f_statistic, df1, df2, lfc
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
table(Oligodendrocyte_Pap1_vs_Control$Expression.Padjust)
## 
## DOWN   NO   UP 
##  343 1797  354
Oligodendrocyte_Pap1_vs_Control.UP <- subset( Oligodendrocyte_Pap1_vs_Control, Expression.Padjust == "UP")
Oligodendrocyte_Pap1_vs_Control.UP <-  Oligodendrocyte_Pap1_vs_Control.UP$name

Oligodendrocyte_Pap1_vs_Control.DOWN <- subset(Oligodendrocyte_Pap1_vs_Control, Expression.Padjust == "DOWN")
Oligodendrocyte_Pap1_vs_Control.DOWN <- Oligodendrocyte_Pap1_vs_Control.DOWN$name

## GO Enrichment for Microglia UP (PAP1 vs. CONTROL)
go_up <- enrichGO(
  gene = Oligodendrocyte_Pap1_vs_Control.UP,
  OrgDb = org.Mm.eg.db,
  keyType = "SYMBOL",       # <- Correct keyType
  ont = "ALL",              # Ontologies: BP, MF, CC
  pAdjustMethod = "BH",
  qvalueCutoff = 1,         # No filtering yet
  readable = FALSE
)

go_down <- enrichGO(
  gene = Oligodendrocyte_Pap1_vs_Control.DOWN,
  OrgDb = org.Mm.eg.db,
  keyType = "SYMBOL",
  ont = "ALL",
  pAdjustMethod = "BH",
  qvalueCutoff = 1,
  readable = FALSE
)

# Extract and label results — use -log10(p.adjust) instead of NES
Oligodendrocyte_Pap1_vs_Control.GO_UP.result <- go_up@result
Oligodendrocyte_Pap1_vs_Control.GO_UP.result$logFDR <- -log10(Oligodendrocyte_Pap1_vs_Control.GO_UP.result$p.adjust)
Oligodendrocyte_Pap1_vs_Control.GO_UP.result$Expression <- "UP"
Oligodendrocyte_Pap1_vs_Control.GO_UP.result$Type <- "Shk.vs.PBS"

Oligodendrocyte_Pap1_vs_Control.GO_Down.result <- go_down@result
Oligodendrocyte_Pap1_vs_Control.GO_Down.result$logFDR <- -log10(Oligodendrocyte_Pap1_vs_Control.GO_Down.result$p.adjust)
Oligodendrocyte_Pap1_vs_Control.GO_Down.result$Expression <- "DOWN"
Oligodendrocyte_Pap1_vs_Control.GO_Down.result$Type <- "Shk.vs.PBS"

saveRDS(Oligodendrocyte_Pap1_vs_Control.GO_UP.result, file = 'Oligodendrocyte_Pap1_vs_Control.GO_UP.result.rds')
write.csv(Oligodendrocyte_Pap1_vs_Control.GO_UP.result, file = 'Oligodendrocyte_Pap1_vs_Control.GO_UP.result.csv')
saveRDS(Oligodendrocyte_Pap1_vs_Control.GO_Down.result, file = 'Oligodendrocyte_Pap1_vs_Control.GO_Down.result.rds')
write.csv(Oligodendrocyte_Pap1_vs_Control.GO_Down.result, file = 'Oligodendrocyte_Pap1_vs_Control.GO_Down.result.csv')
## GO plot
library(ggplot2)
library(dplyr)
library(stringr)

# Combine Up and Down GO terms
go_combined <- rbind(
    Oligodendrocyte_Pap1_vs_Control.GO_UP.result %>% mutate(Regulation = "Upregulated"),
   Oligodendrocyte_Pap1_vs_Control.GO_Down.result %>% mutate(Regulation = "Downregulated")
)

# Calculate signed -log10(p.adjust)
go_combined <- go_combined %>%
    mutate(
        log10FDR = -log10(p.adjust),
        signed.log10FDR = ifelse(Regulation == "Downregulated", -log10FDR, log10FDR)
    )

# Optional: Add gene symbols (up to 3 per term)
go_combined <- go_combined %>%
    mutate(
        GeneSymbols = sapply(strsplit(geneID, "/"), function(g) {
            unique_genes <- unique(g)
            paste(head(unique_genes, 3), collapse = ", ")
        })
    )

# Select top 5 from each direction
top_go_plot_data <- go_combined %>%
    group_by(Regulation) %>%
    slice_max(order_by = abs(signed.log10FDR), n = 6) %>%
    ungroup()

# Order GO terms to show Up on top, Down below
top_go_plot_data <- top_go_plot_data %>%
    arrange(desc(Regulation), desc(signed.log10FDR))

#top_go_plot_data$Description <- factor(
   # top_go_plot_data$Description,
    #levels = rev(top_go_plot_data$Description))

# Plot
plot_go <- ggplot(top_go_plot_data, aes(x = signed.log10FDR, y = Description, fill = Regulation)) +
    geom_bar(stat = "identity", width = 0.7) +
    scale_fill_manual(values = c("Upregulated" = "red", "Downregulated" = "blue")) +
    geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.8) +
    theme_minimal(base_size = 14) +
    theme(
        panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
        panel.grid = element_blank(),
        axis.text.y = element_text(size = 14),
        axis.title = element_text(face = "bold"),
        plot.title = element_text(face = "bold", hjust = 0.5),
        legend.position = "right"
    ) +
    labs(
        title = "GO:Oligodendroctes PAP1 vs.Control",
        x = expression(-log[10](AdjP~value)),
        y = "GO Term"
    ) +
    xlim(-20, 20)  # fixed symmetric axis

# Show plot
plot_go

# Save as PDF
ggsave("GO.Oligodendroctes.PAP1.vs.Control.pdf", plot = plot_go, width = 10, height = 6)
write.csv(top_go_plot_data, file = 'Top.GO.Oligodendroctes.PAP1.vs.Control.csv')
#go_combined
write.csv(go_combined, file = 'GO_combined.Oligodendroctes.PAP1.vs.Control.csv')