library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(RColorBrewer)
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0.9000 ✔ readr 2.1.5
## ✔ ggplot2 3.5.2 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(EnhancedVolcano)
## Loading required package: ggrepel
## Warning: package 'ggrepel' was built under R version 4.3.3
library(readr)
library(dplyr)
library(ggplot2)
library(ggpubr)
library(clusterProfiler)
## Warning: package 'clusterProfiler' was built under R version 4.3.1
##
## Registered S3 methods overwritten by 'treeio':
## method from
## MRCA.phylo tidytree
## MRCA.treedata tidytree
## Nnode.treedata tidytree
## Ntip.treedata tidytree
## ancestor.phylo tidytree
## ancestor.treedata tidytree
## child.phylo tidytree
## child.treedata tidytree
## full_join.phylo tidytree
## full_join.treedata tidytree
## groupClade.phylo tidytree
## groupClade.treedata tidytree
## groupOTU.phylo tidytree
## groupOTU.treedata tidytree
## inner_join.phylo tidytree
## inner_join.treedata tidytree
## is.rooted.treedata tidytree
## nodeid.phylo tidytree
## nodeid.treedata tidytree
## nodelab.phylo tidytree
## nodelab.treedata tidytree
## offspring.phylo tidytree
## offspring.treedata tidytree
## parent.phylo tidytree
## parent.treedata tidytree
## root.treedata tidytree
## rootnode.phylo tidytree
## sibling.phylo tidytree
## clusterProfiler v4.8.3 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
##
## Attaching package: 'clusterProfiler'
##
## The following object is masked from 'package:purrr':
##
## simplify
##
## The following object is masked from 'package:stats':
##
## filter
library(enrichR)
## Warning: package 'enrichR' was built under R version 4.3.3
## Welcome to enrichR
## Checking connections ...
## Enrichr ... Connection is Live!
## FlyEnrichr ... Connection is Live!
## WormEnrichr ... Connection is Live!
## YeastEnrichr ... Connection is Live!
## FishEnrichr ... Connection is Live!
## OxEnrichr ... Connection is Live!
library(org.Mm.eg.db)
## Loading required package: AnnotationDbi
## Warning: package 'AnnotationDbi' was built under R version 4.3.2
## Loading required package: stats4
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.3.1
##
## Attaching package: 'BiocGenerics'
##
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
##
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
##
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
##
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
##
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 4.3.1
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 4.3.1
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.3.2
##
## Attaching package: 'S4Vectors'
##
## The following object is masked from 'package:clusterProfiler':
##
## rename
##
## The following objects are masked from 'package:lubridate':
##
## second, second<-
##
## The following object is masked from 'package:tidyr':
##
## expand
##
## The following objects are masked from 'package:dplyr':
##
## first, rename
##
## The following object is masked from 'package:utils':
##
## findMatches
##
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
##
## Attaching package: 'IRanges'
##
## The following object is masked from 'package:clusterProfiler':
##
## slice
##
## The following object is masked from 'package:lubridate':
##
## %within%
##
## The following object is masked from 'package:purrr':
##
## reduce
##
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
##
## Attaching package: 'AnnotationDbi'
##
## The following object is masked from 'package:clusterProfiler':
##
## select
##
## The following object is masked from 'package:dplyr':
##
## select
## GO for Microglia Shk VS. PBS
library(readr)
Microglia_Shk_vs_PBS <- read_csv("~/Desktop/PAP1.Celltype.DEG/shk/Microglia_Shk_vs_PBS.csv")
## New names:
## Rows: 581 Columns: 10
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): name, diffexpressed, Expression.Padjust dbl (7): ...1, pval, adj_pval,
## f_statistic, df1, df2, lfc
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
table(Microglia_Shk_vs_PBS$Expression.Padjust)
##
## DOWN NO UP
## 16 530 35
resDF <- Microglia_Shk_vs_PBS
# Count groups
up_count <- sum(resDF$Expression.Padjust == "UP", na.rm = TRUE)
down_count <- sum(resDF$Expression.Padjust == "DOWN", na.rm = TRUE)
no_count <- sum(resDF$Expression.Padjust == "NO", na.rm = TRUE)
# Create legend label
legend_title <- paste0("Expression\nUP: ", up_count,
" | DOWN: ", down_count,
" | NS: ", no_count)
# Top 15 UP and DOWN
top_up <- resDF %>% filter(Expression.Padjust == "UP") %>% arrange(adj_pval) %>% head(10)
top_down <- resDF %>% filter(Expression.Padjust == "DOWN") %>% arrange(adj_pval) %>% head(10)
top_genes <- bind_rows(top_up, top_down)
#####
Microglia_Shk_vs_PBS.UP <- subset( Microglia_Shk_vs_PBS, Expression.Padjust == "UP")
Microglia_Shk_vs_PBS.UP <- Microglia_Shk_vs_PBS.UP$name
Microglia_Shk_vs_PBS.DOWN <- subset(Microglia_Shk_vs_PBS, Expression.Padjust == "DOWN")
Microglia_Shk_vs_PBS.DOWN <- Microglia_Shk_vs_PBS.DOWN$name
## GO Enrichment for Microglia UP (PAP1 vs. CONTROL)
go_up <- enrichGO(
gene = Microglia_Shk_vs_PBS.UP,
OrgDb = org.Mm.eg.db,
keyType = "SYMBOL", # <- Correct keyType
ont = "ALL", # Ontologies: BP, MF, CC
pAdjustMethod = "BH",
qvalueCutoff = 1, # No filtering yet
readable = FALSE
)
go_down <- enrichGO(
gene = Microglia_Shk_vs_PBS.DOWN,
OrgDb = org.Mm.eg.db,
keyType = "SYMBOL",
ont = "ALL",
pAdjustMethod = "BH",
qvalueCutoff = 1,
readable = FALSE
)
# Extract and label results — use -log10(p.adjust) instead of NES
Microglia_Shk_vs_PBS.GO_UP.result <- go_up@result
Microglia_Shk_vs_PBS.GO_UP.result$logFDR <- -log10(Microglia_Shk_vs_PBS.GO_UP.result$p.adjust)
Microglia_Shk_vs_PBS.GO_UP.result$Expression <- "UP"
Microglia_Shk_vs_PBS.GO_UP.result$Type <- "Shk.vs.PBS"
Microglia_Shk_vs_PBS.GO_Down.result <- go_down@result
Microglia_Shk_vs_PBS.GO_Down.result$logFDR <- -log10(Microglia_Shk_vs_PBS.GO_Down.result$p.adjust)
Microglia_Shk_vs_PBS.GO_Down.result$Expression <- "DOWN"
Microglia_Shk_vs_PBS.GO_Down.result$Type <- "Shk.vs.PBS"
saveRDS(Microglia_Shk_vs_PBS.GO_UP.result, file = 'Microglia_Shk_vs_PBS.GO_UP.result.rds')
write.csv(Microglia_Shk_vs_PBS.GO_UP.result, file = 'Microglia_Shk_vs_PBS.GO_UP.result.csv')
saveRDS(Microglia_Shk_vs_PBS.GO_Down.result, file = 'Microglia_Shk_vs_PBS.GO_Down.result.rds')
write.csv(Microglia_Shk_vs_PBS.GO_Down.result, file = 'Microglia_Shk_vs_PBS.GO_Down.result.csv')
## GO plot
library(ggplot2)
library(dplyr)
library(stringr)
# Combine Up and Down GO terms
# Combine Up and Down GO terms
go_combined <- rbind(
Microglia_Shk_vs_PBS.GO_UP.result %>% mutate(Regulation = "Upregulated"),
Microglia_Shk_vs_PBS.GO_Down.result %>% mutate(Regulation = "Downregulated")
)
# Calculate signed -log10(p.adjust)
go_combined <- go_combined %>%
mutate(
log10FDR = -log10(p.adjust),
signed.log10FDR = ifelse(Regulation == "Downregulated", -log10FDR, log10FDR)
)
# Optional: Add gene symbols (up to 3 per term)
go_combined <- go_combined %>%
mutate(
GeneSymbols = sapply(strsplit(geneID, "/"), function(g) {
unique_genes <- unique(g)
paste(head(unique_genes, 3), collapse = ", ")
})
)
# Select top 5 from each direction
top_go_plot_data <- go_combined %>%
group_by(Regulation) %>%
slice_max(order_by = abs(signed.log10FDR), n = 6) %>%
ungroup()
# Order GO terms to show Up on top, Down below
top_go_plot_data <- top_go_plot_data %>%
arrange(desc(Regulation), desc(signed.log10FDR))
top_go_plot_data$Description <- factor(
top_go_plot_data$Description,
levels = rev(top_go_plot_data$Description)
)
# Plot
plot_go <- ggplot(top_go_plot_data, aes(x = signed.log10FDR, y = Description, fill = Regulation)) +
geom_bar(stat = "identity", width = 0.7) +
scale_fill_manual(values = c("Upregulated" = "red", "Downregulated" = "blue")) +
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.8) +
theme_minimal(base_size = 14) +
theme(
panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
panel.grid = element_blank(),
axis.text.y = element_text(size = 14),
axis.title = element_text(face = "bold"),
plot.title = element_text(face = "bold", hjust = 0.5),
legend.position = "right"
) +
labs(
title = "GO:Microglia Shk vs.PBS",
x = expression(-log[10](AdjP~value)),
y = "GO Term"
) +
xlim(-10, 10) # fixed symmetric axis
# Show plot
plot_go

# Save as PDF
ggsave("GO.Microglia Shk vs.PBS.pdf", plot = plot_go, width = 10, height = 6)
write.csv(top_go_plot_data, file = 'Top.GO.Microglia.Shk.vs.PBS.csv')
#go_combined
write.csv(go_combined, file = 'GO_combined.Microglia.Shk.vs.PBS.csv')
library(readr)
Oligodendrocyte_Pap1_vs_Control <- read_csv("~/Desktop/PAP1.Celltype.DEG/Oligodendrocyte_Pap1_vs_Control.csv")
## New names:
## Rows: 2494 Columns: 10
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): name, diffexpressed, Expression.Padjust dbl (7): ...1, pval, adj_pval,
## f_statistic, df1, df2, lfc
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
table(Oligodendrocyte_Pap1_vs_Control$Expression.Padjust)
##
## DOWN NO UP
## 343 1797 354
Oligodendrocyte_Pap1_vs_Control.UP <- subset( Oligodendrocyte_Pap1_vs_Control, Expression.Padjust == "UP")
Oligodendrocyte_Pap1_vs_Control.UP <- Oligodendrocyte_Pap1_vs_Control.UP$name
Oligodendrocyte_Pap1_vs_Control.DOWN <- subset(Oligodendrocyte_Pap1_vs_Control, Expression.Padjust == "DOWN")
Oligodendrocyte_Pap1_vs_Control.DOWN <- Oligodendrocyte_Pap1_vs_Control.DOWN$name
## GO Enrichment for Microglia UP (PAP1 vs. CONTROL)
go_up <- enrichGO(
gene = Oligodendrocyte_Pap1_vs_Control.UP,
OrgDb = org.Mm.eg.db,
keyType = "SYMBOL", # <- Correct keyType
ont = "ALL", # Ontologies: BP, MF, CC
pAdjustMethod = "BH",
qvalueCutoff = 1, # No filtering yet
readable = FALSE
)
go_down <- enrichGO(
gene = Oligodendrocyte_Pap1_vs_Control.DOWN,
OrgDb = org.Mm.eg.db,
keyType = "SYMBOL",
ont = "ALL",
pAdjustMethod = "BH",
qvalueCutoff = 1,
readable = FALSE
)
# Extract and label results — use -log10(p.adjust) instead of NES
Oligodendrocyte_Pap1_vs_Control.GO_UP.result <- go_up@result
Oligodendrocyte_Pap1_vs_Control.GO_UP.result$logFDR <- -log10(Oligodendrocyte_Pap1_vs_Control.GO_UP.result$p.adjust)
Oligodendrocyte_Pap1_vs_Control.GO_UP.result$Expression <- "UP"
Oligodendrocyte_Pap1_vs_Control.GO_UP.result$Type <- "Shk.vs.PBS"
Oligodendrocyte_Pap1_vs_Control.GO_Down.result <- go_down@result
Oligodendrocyte_Pap1_vs_Control.GO_Down.result$logFDR <- -log10(Oligodendrocyte_Pap1_vs_Control.GO_Down.result$p.adjust)
Oligodendrocyte_Pap1_vs_Control.GO_Down.result$Expression <- "DOWN"
Oligodendrocyte_Pap1_vs_Control.GO_Down.result$Type <- "Shk.vs.PBS"
saveRDS(Oligodendrocyte_Pap1_vs_Control.GO_UP.result, file = 'Oligodendrocyte_Pap1_vs_Control.GO_UP.result.rds')
write.csv(Oligodendrocyte_Pap1_vs_Control.GO_UP.result, file = 'Oligodendrocyte_Pap1_vs_Control.GO_UP.result.csv')
saveRDS(Oligodendrocyte_Pap1_vs_Control.GO_Down.result, file = 'Oligodendrocyte_Pap1_vs_Control.GO_Down.result.rds')
write.csv(Oligodendrocyte_Pap1_vs_Control.GO_Down.result, file = 'Oligodendrocyte_Pap1_vs_Control.GO_Down.result.csv')
## GO plot
library(ggplot2)
library(dplyr)
library(stringr)
# Combine Up and Down GO terms
go_combined <- rbind(
Oligodendrocyte_Pap1_vs_Control.GO_UP.result %>% mutate(Regulation = "Upregulated"),
Oligodendrocyte_Pap1_vs_Control.GO_Down.result %>% mutate(Regulation = "Downregulated")
)
# Calculate signed -log10(p.adjust)
go_combined <- go_combined %>%
mutate(
log10FDR = -log10(p.adjust),
signed.log10FDR = ifelse(Regulation == "Downregulated", -log10FDR, log10FDR)
)
# Optional: Add gene symbols (up to 3 per term)
go_combined <- go_combined %>%
mutate(
GeneSymbols = sapply(strsplit(geneID, "/"), function(g) {
unique_genes <- unique(g)
paste(head(unique_genes, 3), collapse = ", ")
})
)
# Select top 5 from each direction
top_go_plot_data <- go_combined %>%
group_by(Regulation) %>%
slice_max(order_by = abs(signed.log10FDR), n = 6) %>%
ungroup()
# Order GO terms to show Up on top, Down below
top_go_plot_data <- top_go_plot_data %>%
arrange(desc(Regulation), desc(signed.log10FDR))
#top_go_plot_data$Description <- factor(
# top_go_plot_data$Description,
#levels = rev(top_go_plot_data$Description))
# Plot
plot_go <- ggplot(top_go_plot_data, aes(x = signed.log10FDR, y = Description, fill = Regulation)) +
geom_bar(stat = "identity", width = 0.7) +
scale_fill_manual(values = c("Upregulated" = "red", "Downregulated" = "blue")) +
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.8) +
theme_minimal(base_size = 14) +
theme(
panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
panel.grid = element_blank(),
axis.text.y = element_text(size = 14),
axis.title = element_text(face = "bold"),
plot.title = element_text(face = "bold", hjust = 0.5),
legend.position = "right"
) +
labs(
title = "GO:Oligodendroctes PAP1 vs.Control",
x = expression(-log[10](AdjP~value)),
y = "GO Term"
) +
xlim(-20, 20) # fixed symmetric axis
# Show plot
plot_go

# Save as PDF
ggsave("GO.Oligodendroctes.PAP1.vs.Control.pdf", plot = plot_go, width = 10, height = 6)
write.csv(top_go_plot_data, file = 'Top.GO.Oligodendroctes.PAP1.vs.Control.csv')
#go_combined
write.csv(go_combined, file = 'GO_combined.Oligodendroctes.PAP1.vs.Control.csv')