METRS.PULLDOWN

library(tidyverse)

## Warning: package 'ggplot2' was built under R version 4.3.3

## Warning: package 'purrr' was built under R version 4.3.3

## Warning: package 'lubridate' was built under R version 4.3.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(dplyr)
library(gplots)

## Warning: package 'gplots' was built under R version 4.3.3

## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess

library(ggrepel)

## Warning: package 'ggrepel' was built under R version 4.3.3

library(pheatmap)
library(variancePartition)

## Warning: package 'variancePartition' was built under R version 4.3.2

## Loading required package: limma
## Loading required package: BiocParallel
## 
## Attaching package: 'variancePartition'
## 
## The following object is masked from 'package:limma':
## 
##     topTable

library(limma)
library(BiocParallel)
library(M3C)
library(ggplot2)

MetRS_Pulldown_Metadata <- readRDS("~/Desktop/METRS.PULLDOWN.2025/MetRS_Pulldown_Metadata.rds")
Aldh1_MetRs_Input <- readRDS("~/Desktop/METRS.PULLDOWN.2025/Aldh1_MetRs_Input.rds")
dim(Aldh1_MetRs_Input)

## [1] 4039   11

colnames(Aldh1_MetRs_Input)==row.names(MetRS_Pulldown_Metadata)

##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

##Boxplot before Normalization
# Load necessary library
library(ggplot2)
# Create a vector of colors
colors <- c("red", "green", "blue", "purple", "orange", "yellow", "pink", "cyan")  # Add as many colors as needed
# Assign a unique color to each condition
conditionNames <- unique(MetRS_Pulldown_Metadata$Group)
colorVector <- setNames(colors[1:length(conditionNames)], conditionNames)
# Create a color vector for the samples
statusCol <- colorVector[MetRS_Pulldown_Metadata$Group]
# Set margin sizes (bottom, left, top, right)
par(mar = c(10, 4, 4, 2) + 0.1)
# Check distributions of samples using boxplots
boxplot(Aldh1_MetRs_Input,
xlab="",
ylab="Aldh1_MetRs",
las=2,
col=statusCol,
main="Sample Distributions by Group before Normalization")
# Let's add a blue horizontal line that corresponds to the median logCPM
abline(h=median(as.matrix(Aldh1_MetRs_Input)), col="blue")

library(preprocessCore)
Aldh1_MetRs_Input.norm <- as.matrix(Aldh1_MetRs_Input)
Aldh1_MetRs_Input.norm <- normalize.quantiles(Aldh1_MetRs_Input.norm)
colnames(Aldh1_MetRs_Input.norm) <- colnames(Aldh1_MetRs_Input)
row.names(Aldh1_MetRs_Input.norm) <- row.names(Aldh1_MetRs_Input)
#boxplot after normalization
library(ggplot2)
# Create a vector of colors
colors <- c("red", "green", "blue", "purple", "orange", "yellow", "pink", "cyan")  # Add as many colors as needed
# Assign a unique color to each condition
conditionNames <- unique(MetRS_Pulldown_Metadata$Group)
colorVector <- setNames(colors[1:length(conditionNames)], conditionNames)
# Create a color vector for the samples
statusCol <- colorVector[MetRS_Pulldown_Metadata$Group]
# Set margin sizes (bottom, left, top, right)
par(mar = c(10, 4, 4, 2) + 0.1)
# Check distributions of samples using boxplots
boxplot(Aldh1_MetRs_Input.norm,
xlab="",
ylab="Aldh1_MetRs.norm",
las=2,
col=statusCol,
main="Sample Distributions by Group after Normalization")
# Let's add a blue horizontal line that corresponds to the median logCPM
abline(h=median(as.matrix(Aldh1_MetRs_Input.norm)), col="blue")

#Aldh1.MetRs.Input.BCCAO.Sham <- readRDS("~/Desktop/METRS.PULLDOWN.2025/Aldh1.MetRs.Input.BCCAO.Sham.rds")
MetRS.Metadata.BCCAO.SHAM <- readRDS("~/Desktop/METRS.PULLDOWN.2025/MetRS.Metadata.BCCAO.SHAM.rds")
# Get the sample names from the metadata
samples_to_keep <- rownames(MetRS.Metadata.BCCAO.SHAM)
# Subset the matrix to include only these samples (columns)
Aldh1_MetRs_Input.BCCAO.SHAM <- Aldh1_MetRs_Input[, samples_to_keep]
colnames(Aldh1_MetRs_Input.BCCAO.SHAM)==row.names(MetRS.Metadata.BCCAO.SHAM)

## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

cleanDat  <- Aldh1_MetRs_Input.BCCAO.SHAM
#cleanDat <- as.matrix(cleanDat)
#cleanDat <- normalize.quantiles(cleanDat)
#colnames(cleanDat ) <- colnames(Aldh1.MetRs.Input.BCCAO.Sham)
#row.names(cleanDat) <- row.names(Aldh1.MetRs.Input.BCCAO.Sham)
numericMeta <- MetRS.Metadata.BCCAO.SHAM
numericMeta$Group <- numericMeta$Group
condition <- numericMeta$Group
Grouping <- numericMeta$Group
source("/Users/us74/Desktop/2025/parANOVA.dex.R")
ANOVAout <- parANOVA.dex()

## - parallelThreads variable not set. Running with 2 threads only.
## - Network color assignment vector not supplied or not of length in rows of cleanDat; will not be included in output table and data frame.
## - twoGroupCorrMethod variable not set to a correction method for p.adjust when only 2 groups of samples specified in Grouping. Using Benjamini Hochberg 'BH' FDR.
## - fallbackIfSmallTukeyP variable not set. Using recommended Bonferroni t-test FDR for unreliable Tukey p values <10^-8.5

## 
## Attaching package: 'foreach'

## The following objects are masked from 'package:purrr':
## 
##     accumulate, when

## 
## ...Tukey p<10^-8.5 Fallback calculations using Bonferroni corrected T test: 0 [0%]

labelTop <- 20
FCmin= 0
flip = -3
signifP=0.05
plotVolc()

## - No comparison p value columns selected in selectComps. Using ALL comparisons.
##  - selectComps may not reference valid integer p value column indexes of ANOVAout (or CORout).
##    Output will be for all comparisons or correlation(s).
## - useNETcolors not set or not TRUE/FALSE. NETcolors not found in ANOVAout, so 3 color volcano(es) will be drawn.
## - Variable outputfigs not specified. Saving volcano plots to /Users/us74/Downloads .
## 
## ...Thresholds:
##    [x] Applying a 0% minimum fold change threshold at + and - x=0 .
##    [y] with minimum significance cutoff p < 0.05, equivalent to -log10(p) y=1.3 .
## 
## Processing ANOVA column 3 (Aldh1l1MetRS.Sham vs Aldh1l1MetRS.BCCAO) for volcano ...

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Generating PDF volcano for ANOVAout column 3 (Aldh1l1MetRS.Sham vs Aldh1l1MetRS.BCCAO) ...

## Generating HTML volcano for ANOVAout column 3 (Aldh1l1MetRS.Sham vs Aldh1l1MetRS.BCCAO) ...

## Warning in geom2trace.default(dots[[1L]][[3L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

## Warning in geom2trace.default(dots[[1L]][[3L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[3L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

diffEx_Input_BCCAO_SHAM.97.128 <- readRDS("~/Desktop/MetRS.062025/diffEx_Input_BCCAO_SHAM.97.128.rds")
table(diffEx_Input_BCCAO_SHAM.97.128$Expression)

## 
## Down  NOT   UP 
##   97 3814  128

# Load required libraries
library(clusterProfiler)

## Warning: package 'clusterProfiler' was built under R version 4.3.3

##

## clusterProfiler v4.10.1  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141

## 
## Attaching package: 'clusterProfiler'

## The following object is masked from 'package:purrr':
## 
##     simplify

## The following object is masked from 'package:stats':
## 
##     filter

library(org.Mm.eg.db)  # Use org.Hs.eg.db for human

## Loading required package: AnnotationDbi

## Loading required package: stats4

## Loading required package: BiocGenerics

## 
## Attaching package: 'BiocGenerics'

## The following object is masked from 'package:limma':
## 
##     plotMA

## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union

## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min

## Loading required package: Biobase

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

## Loading required package: IRanges

## Loading required package: S4Vectors

## Warning: package 'S4Vectors' was built under R version 4.3.2

## 
## Attaching package: 'S4Vectors'

## The following object is masked from 'package:clusterProfiler':
## 
##     rename

## The following object is masked from 'package:plotly':
## 
##     rename

## The following object is masked from 'package:gplots':
## 
##     space

## The following objects are masked from 'package:lubridate':
## 
##     second, second<-

## The following objects are masked from 'package:dplyr':
## 
##     first, rename

## The following object is masked from 'package:tidyr':
## 
##     expand

## The following object is masked from 'package:utils':
## 
##     findMatches

## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname

## 
## Attaching package: 'IRanges'

## The following object is masked from 'package:clusterProfiler':
## 
##     slice

## The following object is masked from 'package:plotly':
## 
##     slice

## The following object is masked from 'package:lubridate':
## 
##     %within%

## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice

## The following object is masked from 'package:purrr':
## 
##     reduce

## 
## Attaching package: 'AnnotationDbi'

## The following object is masked from 'package:clusterProfiler':
## 
##     select

## The following object is masked from 'package:plotly':
## 
##     select

## The following object is masked from 'package:dplyr':
## 
##     select

##

library(ggplot2)
#Aldh1_MetRs_Input.Background <- row.names(Aldh1_MetRs_Input)

# ================================
# 1. Prepare Upregulated and Downregulated Gene Lists
diffEx_Input_BCCAO_SHAM.97.128$Protein <- diffEx_Input_BCCAO_SHAM.97.128$...1
# ================================
# Filter based on your 'Expression' column
up_genes <- diffEx_Input_BCCAO_SHAM.97.128$Protein[diffEx_Input_BCCAO_SHAM.97.128$Expression == "UP"]
down_genes <- diffEx_Input_BCCAO_SHAM.97.128$Protein[diffEx_Input_BCCAO_SHAM.97.128$Expression == "Down"]

Aldh1_MetRs_Input.Background <- readRDS("~/Desktop/MetRS.062025/Aldh1_MetRs_Input.Background.rds")

# GO: Biological Process
ego_up_BP <- enrichGO(gene = up_genes,
                      universe = Aldh1_MetRs_Input.Background,
                      OrgDb = org.Mm.eg.db,
                      keyType = "SYMBOL",
                      ont = "BP",
                      pAdjustMethod = "BH",
                      qvalueCutoff = 1,
                      readable = TRUE)

ego_up_BP_df <- as.data.frame(ego_up_BP@result)
ego_up_BP_df$ONTOLOGY <- "BP"


# GO: Molecular Function
ego_up_MF <- enrichGO(gene = up_genes,
                      universe = Aldh1_MetRs_Input.Background,
                      OrgDb = org.Mm.eg.db,
                      keyType = "SYMBOL",
                      ont = "MF",
                      pAdjustMethod = "BH",
                      qvalueCutoff = 1,
                      readable = TRUE)

ego_up_MF_df <- as.data.frame(ego_up_MF@result)
ego_up_MF_df$ONTOLOGY <- "MF"

# GO: Cellular Component
ego_up_CC <- enrichGO(gene = up_genes,
                      universe = Aldh1_MetRs_Input.Background,
                      OrgDb = org.Mm.eg.db,
                      keyType = "SYMBOL",
                      ont = "CC",
                      pAdjustMethod = "BH",
                      qvalueCutoff = 1,
                      readable = TRUE)

ego_up_CC_df <- as.data.frame(ego_up_CC@result)
ego_up_CC_df$ONTOLOGY <- "CC"

# Combine all 3 GO categories
# ================================
combined_GO_UP <- rbind(ego_up_BP_df, ego_up_MF_df, ego_up_CC_df)

# Save to CSV
write.csv(combined_GO_UP, "GO_UP_AllCategories_combined.csv", row.names = FALSE)

# Optional: View top few entries
head(combined_GO_UP)

##                    ID                                  Description GeneRatio
## GO:0070374 GO:0070374 positive regulation of ERK1 and ERK2 cascade     7/119
## GO:0070925 GO:0070925                           organelle assembly    20/119
## GO:0048565 GO:0048565                  digestive tract development     4/119
## GO:2000404 GO:2000404               regulation of T cell migration     4/119
## GO:0055123 GO:0055123                 digestive system development     4/119
## GO:0007034 GO:0007034                           vacuolar transport     8/119
##             BgRatio       pvalue  p.adjust    qvalue
## GO:0070374  48/3849 0.0005816635 0.7978376 0.7958959
## GO:0070925 305/3849 0.0008895092 0.7978376 0.7958959
## GO:0048565  17/3849 0.0015131884 0.7978376 0.7958959
## GO:2000404  18/3849 0.0018994882 0.7978376 0.7958959
## GO:0055123  20/3849 0.0028670530 0.7978376 0.7958959
## GO:0007034  80/3849 0.0029711329 0.7978376 0.7958959
##                                                                                                                             geneID
## GO:0070374                                                                               Pdgfra/Crkl/Egfr/Abl2/Arrb1/Sema7a/Adam17
## GO:0070925 Stx7/Rdx/Pdgfra/Atxn10/Dnm2/Yap1/Crkl/Rps25/Ube2b/Hdac2/Map9/Nup62/Wdr44/Ythdf3/Ablim1/Vps11/Snx4/Dhx30/Eif4enif1/Rpl38
## GO:0048565                                                                                                   Pdgfra/Yap1/Crkl/Egfr
## GO:2000404                                                                                                  Crkl/Abl2/Oxsr1/Adam17
## GO:0055123                                                                                                   Pdgfra/Yap1/Crkl/Egfr
## GO:0007034                                                                   Stx7/Atp6v0d1/Vps13a/Vipas39/Hook1/Pik3r4/Vps11/Vps35
##            Count ONTOLOGY
## GO:0070374     7       BP
## GO:0070925    20       BP
## GO:0048565     4       BP
## GO:2000404     4       BP
## GO:0055123     4       BP
## GO:0007034     8       BP

##GO plot
# Step 1: Add -log10(p-value) column
combined_GO_UP$minusLog10P <- -log10(combined_GO_UP$pvalue)

# Step 2: Select top 10 unique GO terms by p.adjust
top10_GO_UP <- combined_GO_UP[order(combined_GO_UP$p.adjust), ]
top10_GO_UP <- top10_GO_UP[!duplicated(top10_GO_UP$Description), ]
top10_GO_UP <- head(top10_GO_UP, 10)

# Step 3: Plot bar plot using ggplot2
library(ggplot2)
# pdf("GO_UP_Input_BCCAO_SHAM.97.128.pdf", width = 10, height = 6)  # adjust size as needed
# Plot: Count on Y, Pathways on X, fill by -log10(pvalue)
ggplot(top10_GO_UP, aes(x = reorder(Description, Count), y = Count, fill = minusLog10P)) +
    geom_bar(stat = "identity", width = 0.8) +
    coord_flip() +
    scale_fill_gradient(
        low = "red",
        high = "darkred",
        limits = c(0, 1),
        oob = scales::squish  # Squish values outside [0,1] to the boundary
    ) +
    labs(title = "Top-10 Enriched GO Terms (UPregulated)",
         x = "GO Term",
         y = "Gene Count",
         fill = "-log10(p-value)") +
    theme_minimal(base_size = 12) +
    theme(axis.text.y = element_text(size = 10))

#dev.off()

write.csv(combined_GO_UP, file = 'GO.UP.Input_BCCAO_SHAM.97.128.csv')

write.csv(top10_GO_UP, file = 'GO.TOP.UP.Input_BCCAO_SHAM.97.128.csv')

# 1. Prepare Downregulated Gene List
diffEx_Input_BCCAO_SHAM.97.128$Protein <- diffEx_Input_BCCAO_SHAM.97.128$...1
down_genes <- diffEx_Input_BCCAO_SHAM.97.128$Protein[diffEx_Input_BCCAO_SHAM.97.128$Expression == "Down"]

# 2. Run GO Enrichment for BP, MF, CC
library(clusterProfiler)
library(org.Mm.eg.db)

# GO: BP
ego_down_BP <- enrichGO(gene = down_genes,
                        universe = Aldh1_MetRs_Input.Background,
                        OrgDb = org.Mm.eg.db,
                        keyType = "SYMBOL",
                        ont = "BP",
                        pAdjustMethod = "BH",
                        qvalueCutoff = 1,
                        readable = TRUE)
ego_down_BP_df <- as.data.frame(ego_down_BP@result)
ego_down_BP_df$ONTOLOGY <- "BP"

# GO: MF
ego_down_MF <- enrichGO(gene = down_genes,
                        universe = Aldh1_MetRs_Input.Background,
                        OrgDb = org.Mm.eg.db,
                        keyType = "SYMBOL",
                        ont = "MF",
                        pAdjustMethod = "BH",
                        qvalueCutoff = 1,
                        readable = TRUE)
ego_down_MF_df <- as.data.frame(ego_down_MF@result)
ego_down_MF_df$ONTOLOGY <- "MF"

# GO: CC
ego_down_CC <- enrichGO(gene = down_genes,
                        universe = Aldh1_MetRs_Input.Background,
                        OrgDb = org.Mm.eg.db,
                        keyType = "SYMBOL",
                        ont = "CC",
                        pAdjustMethod = "BH",
                        qvalueCutoff = 1,
                        readable = TRUE)
ego_down_CC_df <- as.data.frame(ego_down_CC@result)
ego_down_CC_df$ONTOLOGY <- "CC"

# Combine all GO categories
combined_GO_DOWN <- rbind(ego_down_BP_df, ego_down_MF_df, ego_down_CC_df)

# Save combined GO results to CSV
write.csv(combined_GO_DOWN, file = 'GO.DOWN.Input_BCCAO_SHAM.97.128.csv', row.names = FALSE)

# 3. Prepare Top 10 Plot Data
combined_GO_DOWN$minusLog10P <- -log10(combined_GO_DOWN$pvalue)

top10_GO_DOWN <- combined_GO_DOWN[order(combined_GO_DOWN$p.adjust), ]
top10_GO_DOWN <- top10_GO_DOWN[!duplicated(top10_GO_DOWN$Description), ]
top10_GO_DOWN <- head(top10_GO_DOWN, 10)

write.csv(top10_GO_DOWN, file = 'GO.TOP.DOWN.Input_BCCAO_SHAM.97.128.csv', row.names = FALSE)

# 4. Plot Top 10 GO Terms (Downregulated)
library(ggplot2)

# pdf("GO_DOWN_Input_BCCAO_SHAM.97.128.pdf", width = 10, height = 6)

ggplot(top10_GO_DOWN, aes(x = reorder(Description, Count), y = Count, fill = minusLog10P)) +
  geom_bar(stat = "identity", width = 0.8) +
  coord_flip() +
  scale_fill_gradient(
    low = "blue",
    high = "darkblue",
    limits = c(0, 1),
    oob = scales::squish
  ) +
  labs(title = "Top-10 Enriched GO Terms (DOWNregulated)",
       x = "GO Term",
       y = "Gene Count",
       fill = "-log10(p-value)") +
  theme_minimal(base_size = 12) +
  theme(axis.text.y = element_text(size = 10))

#dev.off()

#Aldh1_MetRs_Sham_Aldh1l1 <- read.csv("Aldh1.MetRs.Sham.Aldh1l1.csv", row.names = 1)
Aldh1_MetRs_Sham_Aldh1l1.before.Normalization <- readRDS("~/Desktop/METRS.PULLDOWN.2025/Aldh1_MetRs_Sham_Aldh1l1.before.Normalization.rds")
MetRS_metadata_Sham_aldh1l1 <- readRDS("~/Desktop/METRS.PULLDOWN.2025/MetRS_metadata_Sham_aldh1l1.rds")
cleanDat  <- Aldh1_MetRs_Sham_Aldh1l1.before.Normalization
#cleanDat <- as.matrix(cleanDat)
#cleanDat <- normalize.quantiles(cleanDat)
#colnames(cleanDat ) <- colnames(Aldh1_MetRs_Sham_Aldh1l1.before.Normalization)
#row.names(cleanDat) <- row.names(Aldh1_MetRs_Sham_Aldh1l1.before.Normalization)
numericMeta <- MetRS_metadata_Sham_aldh1l1
numericMeta$Group <- numericMeta$Group
condition <- numericMeta$Group
Grouping <- numericMeta$Group
source("/Users/us74/Desktop/2025/parANOVA.dex.R")
ANOVAout <- parANOVA.dex()

## - parallelThreads variable not set. Running with 2 threads only.
## - Network color assignment vector not supplied or not of length in rows of cleanDat; will not be included in output table and data frame.
## - twoGroupCorrMethod variable not set to a correction method for p.adjust when only 2 groups of samples specified in Grouping. Using Benjamini Hochberg 'BH' FDR.
## - fallbackIfSmallTukeyP variable not set. Using recommended Bonferroni t-test FDR for unreliable Tukey p values <10^-8.5
## 
## ...Tukey p<10^-8.5 Fallback calculations using Bonferroni corrected T test: 0 [0%]

labelTop <- 20
FCmin= 0
flip = -3
signifP=0.05
plotVolc()

## - No comparison p value columns selected in selectComps. Using ALL comparisons.
##  - selectComps may not reference valid integer p value column indexes of ANOVAout (or CORout).
##    Output will be for all comparisons or correlation(s).
## - useNETcolors not set or not TRUE/FALSE. NETcolors not found in ANOVAout, so 3 color volcano(es) will be drawn.
## - Variable outputfigs not specified. Saving volcano plots to /Users/us74/Downloads .
## 
## ...Thresholds:
##    [x] Applying a 0% minimum fold change threshold at + and - x=0 .
##    [y] with minimum significance cutoff p < 0.05, equivalent to -log10(p) y=1.3 .
## 
## Processing ANOVA column 3 (Aldh1l1MetRS.Sham vs Aldh1l1MetRS) for volcano ...

## Generating PDF volcano for ANOVAout column 3 (Aldh1l1MetRS.Sham vs Aldh1l1MetRS) ...

## Generating HTML volcano for ANOVAout column 3 (Aldh1l1MetRS.Sham vs Aldh1l1MetRS) ...

## Warning in geom2trace.default(dots[[1L]][[3L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[3L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[3L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

diffEx_Aldh1l1MetRS_Sham_vs_Aldh1l1MetRS.296.331 <- readRDS("~/Desktop/MetRS.062025/diffEx_Aldh1l1MetRS_Sham_vs_Aldh1l1MetRS.296.331.rds")
table(diffEx_Aldh1l1MetRS_Sham_vs_Aldh1l1MetRS.296.331$Expression)

## 
## Down  NOT   UP 
##  331 3409  296

# 1. Prepare gene list
diffEx_Aldh1l1MetRS_Sham_vs_Aldh1l1MetRS.296.331$Protein <- diffEx_Aldh1l1MetRS_Sham_vs_Aldh1l1MetRS.296.331$...1

up_genes <- diffEx_Aldh1l1MetRS_Sham_vs_Aldh1l1MetRS.296.331$Protein[
  diffEx_Aldh1l1MetRS_Sham_vs_Aldh1l1MetRS.296.331$Expression == "UP"
]

down_genes <- diffEx_Aldh1l1MetRS_Sham_vs_Aldh1l1MetRS.296.331$Protein[
  diffEx_Aldh1l1MetRS_Sham_vs_Aldh1l1MetRS.296.331$Expression == "Down"
]

####
# 1. Prepare Upregulated Gene List
diffEx_Aldh1l1MetRS_Sham_vs_Aldh1l1MetRS.296.331$Protein <- diffEx_Aldh1l1MetRS_Sham_vs_Aldh1l1MetRS.296.331$...1

up_genes <- diffEx_Aldh1l1MetRS_Sham_vs_Aldh1l1MetRS.296.331$Protein[
  diffEx_Aldh1l1MetRS_Sham_vs_Aldh1l1MetRS.296.331$Expression == "UP"
]

# 2. GO Enrichment

# GO: Biological Process
ego_up_BP <- enrichGO(gene = up_genes,
                      universe = Aldh1_MetRs_Input.Background,
                      OrgDb = org.Mm.eg.db,
                      keyType = "SYMBOL",
                      ont = "BP",
                      pAdjustMethod = "BH",
                      qvalueCutoff = 1,
                      readable = TRUE)

ego_up_BP_df <- as.data.frame(ego_up_BP@result)
ego_up_BP_df$ONTOLOGY <- "BP"

# GO: Molecular Function
ego_up_MF <- enrichGO(gene = up_genes,
                      universe = Aldh1_MetRs_Input.Background,
                      OrgDb = org.Mm.eg.db,
                      keyType = "SYMBOL",
                      ont = "MF",
                      pAdjustMethod = "BH",
                      qvalueCutoff = 1,
                      readable = TRUE)

ego_up_MF_df <- as.data.frame(ego_up_MF@result)
ego_up_MF_df$ONTOLOGY <- "MF"

# GO: Cellular Component
ego_up_CC <- enrichGO(gene = up_genes,
                      universe = Aldh1_MetRs_Input.Background,
                      OrgDb = org.Mm.eg.db,
                      keyType = "SYMBOL",
                      ont = "CC",
                      pAdjustMethod = "BH",
                      qvalueCutoff = 1,
                      readable = TRUE)

ego_up_CC_df <- as.data.frame(ego_up_CC@result)
ego_up_CC_df$ONTOLOGY <- "CC"

# Combine all 3 GO categories
# ================================
combined_GO_UP <- rbind(ego_up_BP_df, ego_up_MF_df, ego_up_CC_df)

# Save to CSV
write.csv(combined_GO_UP, "GO_UP_Aldh1l1MetRS_296_331_combined.csv", row.names = FALSE)

# Optional: View top few entries
head(combined_GO_UP)

##                    ID                          Description GeneRatio  BgRatio
## GO:0099536 GO:0099536                   synaptic signaling    57/280 392/3849
## GO:0007268 GO:0007268       chemical synaptic transmission    54/280 372/3849
## GO:0098916 GO:0098916 anterograde trans-synaptic signaling    54/280 372/3849
## GO:0099537 GO:0099537             trans-synaptic signaling    54/280 375/3849
## GO:0007269 GO:0007269           neurotransmitter secretion    22/280  99/3849
## GO:0099643 GO:0099643          signal release from synapse    22/280  99/3849
##                  pvalue     p.adjust       qvalue
## GO:0099536 9.218917e-08 0.0002028745 0.0001938913
## GO:0007268 2.312011e-07 0.0002028745 0.0001938913
## GO:0098916 2.312011e-07 0.0002028745 0.0001938913
## GO:0099537 3.037044e-07 0.0002028745 0.0001938913
## GO:0007269 1.373727e-06 0.0006117665 0.0005846775
## GO:0099643 1.373727e-06 0.0006117665 0.0005846775
##                                                                                                                                                                                                                                                                                                                                                                       geneID
## GO:0099536 Shank1/Prrt2/Exoc4/Mgll/Gucy1b1/Ppt1/Pak1/Lin7b/Cck/Camk2a/Cdh2/Prkca/Napb/Ptk2/Cacnb3/Nrgn/Snap25/Ywhag/Pfn1/Vamp2/Prkcg/Prkcb/Ywhah/Ica1/Adgrb1/Sorbs2/Kpna1/Snap91/Sh3gl1/Dgkb/Dlgap3/Erc2/Phf24/Rimbp2/Dgkz/Shank2/Anks1b/Dlgap2/Chmp2b/Baiap2/Sipa1l1/Trim9/Synpo/Lin7a/Plcl2/Itpka/Cplx3/Eif4a3/Dlgap1/Napa/Snap29/Gucy1a1/Pfn2/Map1a/Atp2b2/Septin5/Homer1
## GO:0007268                  Shank1/Prrt2/Exoc4/Mgll/Gucy1b1/Ppt1/Pak1/Lin7b/Camk2a/Cdh2/Prkca/Napb/Ptk2/Cacnb3/Nrgn/Snap25/Ywhag/Pfn1/Vamp2/Prkcg/Prkcb/Ywhah/Ica1/Adgrb1/Snap91/Sh3gl1/Dgkb/Dlgap3/Erc2/Phf24/Rimbp2/Dgkz/Shank2/Anks1b/Dlgap2/Chmp2b/Baiap2/Sipa1l1/Trim9/Synpo/Lin7a/Plcl2/Itpka/Cplx3/Eif4a3/Dlgap1/Napa/Snap29/Gucy1a1/Pfn2/Map1a/Atp2b2/Septin5/Homer1
## GO:0098916                  Shank1/Prrt2/Exoc4/Mgll/Gucy1b1/Ppt1/Pak1/Lin7b/Camk2a/Cdh2/Prkca/Napb/Ptk2/Cacnb3/Nrgn/Snap25/Ywhag/Pfn1/Vamp2/Prkcg/Prkcb/Ywhah/Ica1/Adgrb1/Snap91/Sh3gl1/Dgkb/Dlgap3/Erc2/Phf24/Rimbp2/Dgkz/Shank2/Anks1b/Dlgap2/Chmp2b/Baiap2/Sipa1l1/Trim9/Synpo/Lin7a/Plcl2/Itpka/Cplx3/Eif4a3/Dlgap1/Napa/Snap29/Gucy1a1/Pfn2/Map1a/Atp2b2/Septin5/Homer1
## GO:0099537                  Shank1/Prrt2/Exoc4/Mgll/Gucy1b1/Ppt1/Pak1/Lin7b/Camk2a/Cdh2/Prkca/Napb/Ptk2/Cacnb3/Nrgn/Snap25/Ywhag/Pfn1/Vamp2/Prkcg/Prkcb/Ywhah/Ica1/Adgrb1/Snap91/Sh3gl1/Dgkb/Dlgap3/Erc2/Phf24/Rimbp2/Dgkz/Shank2/Anks1b/Dlgap2/Chmp2b/Baiap2/Sipa1l1/Trim9/Synpo/Lin7a/Plcl2/Itpka/Cplx3/Eif4a3/Dlgap1/Napa/Snap29/Gucy1a1/Pfn2/Map1a/Atp2b2/Septin5/Homer1
## GO:0007269                                                                                                                                                                                                                               Prrt2/Ppt1/Pak1/Lin7b/Camk2a/Prkca/Napb/Snap25/Vamp2/Prkcg/Prkcb/Ica1/Snap91/Erc2/Rimbp2/Trim9/Lin7a/Cplx3/Napa/Snap29/Pfn2/Septin5
## GO:0099643                                                                                                                                                                                                                               Prrt2/Ppt1/Pak1/Lin7b/Camk2a/Prkca/Napb/Snap25/Vamp2/Prkcg/Prkcb/Ica1/Snap91/Erc2/Rimbp2/Trim9/Lin7a/Cplx3/Napa/Snap29/Pfn2/Septin5
##            Count ONTOLOGY
## GO:0099536    57       BP
## GO:0007268    54       BP
## GO:0098916    54       BP
## GO:0099537    54       BP
## GO:0007269    22       BP
## GO:0099643    22       BP

## GO plot
# Step 1: Add -log10(p-value) column
combined_GO_UP$minusLog10P <- -log10(combined_GO_UP$pvalue)

# Step 2: Select top 10 unique GO terms by p.adjust
top10_GO_UP <- combined_GO_UP[order(combined_GO_UP$p.adjust), ]
top10_GO_UP <- top10_GO_UP[!duplicated(top10_GO_UP$Description), ]
top10_GO_UP <- head(top10_GO_UP, 10)

# Step 3: Plot bar plot using ggplot2
library(ggplot2)
#pdf("GO_UP_Aldh1l1MetRS_296_331.pdf", width = 10, height = 6)  # adjust size as needed

# Plot: Count on Y, Pathways on X, fill by -log10(pvalue)
ggplot(top10_GO_UP, aes(x = reorder(Description, Count), y = Count, fill = minusLog10P)) +
    geom_bar(stat = "identity", width = 0.8) +
    coord_flip() +
    scale_fill_gradient(
        low = "red",
        high = "darkred",
        limits = c(0, 1),
        oob = scales::squish  # Squish values outside [0,1] to the boundary
    ) +
    labs(title = "Top-10 Enriched GO Terms (UPregulated)",
         x = "GO Term",
         y = "Gene Count",
         fill = "-log10(p-value)") +
    theme_minimal(base_size = 12) +
    theme(axis.text.y = element_text(size = 10))

#dev.off()

# Save tables
write.csv(combined_GO_UP, file = 'GO.UP.Aldh1l1MetRS_296_331.csv')
write.csv(top10_GO_UP, file = 'GO.TOP.UP.Aldh1l1MetRS_296_331.csv')

# 1. Prepare Downregulated Gene List
diffEx_Aldh1l1MetRS_Sham_vs_Aldh1l1MetRS.296.331$Protein <- diffEx_Aldh1l1MetRS_Sham_vs_Aldh1l1MetRS.296.331$...1

down_genes <- diffEx_Aldh1l1MetRS_Sham_vs_Aldh1l1MetRS.296.331$Protein[
  diffEx_Aldh1l1MetRS_Sham_vs_Aldh1l1MetRS.296.331$Expression == "Down"
]

# 2. GO Enrichment

# GO: Biological Process
ego_down_BP <- enrichGO(gene = down_genes,
                        universe = Aldh1_MetRs_Input.Background,
                        OrgDb = org.Mm.eg.db,
                        keyType = "SYMBOL",
                        ont = "BP",
                        pAdjustMethod = "BH",
                        qvalueCutoff = 1,
                        readable = TRUE)

ego_down_BP_df <- as.data.frame(ego_down_BP@result)
ego_down_BP_df$ONTOLOGY <- "BP"

# GO: Molecular Function
ego_down_MF <- enrichGO(gene = down_genes,
                        universe = Aldh1_MetRs_Input.Background,
                        OrgDb = org.Mm.eg.db,
                        keyType = "SYMBOL",
                        ont = "MF",
                        pAdjustMethod = "BH",
                        qvalueCutoff = 1,
                        readable = TRUE)

ego_down_MF_df <- as.data.frame(ego_down_MF@result)
ego_down_MF_df$ONTOLOGY <- "MF"

# GO: Cellular Component
ego_down_CC <- enrichGO(gene = down_genes,
                        universe = Aldh1_MetRs_Input.Background,
                        OrgDb = org.Mm.eg.db,
                        keyType = "SYMBOL",
                        ont = "CC",
                        pAdjustMethod = "BH",
                        qvalueCutoff = 1,
                        readable = TRUE)

ego_down_CC_df <- as.data.frame(ego_down_CC@result)
ego_down_CC_df$ONTOLOGY <- "CC"

# Combine all 3 GO categories
# ================================
combined_GO_DOWN <- rbind(ego_down_BP_df, ego_down_MF_df, ego_down_CC_df)

# Save to CSV
write.csv(combined_GO_DOWN, "GO_DOWN_Aldh1l1MetRS_296_331_combined.csv", row.names = FALSE)

# Optional: View top few entries
head(combined_GO_DOWN)

##                    ID                          Description GeneRatio BgRatio
## GO:0042133 GO:0042133   neurotransmitter metabolic process     6/321 10/3849
## GO:0035725 GO:0035725   sodium ion transmembrane transport    13/321 55/3849
## GO:0006814 GO:0006814                 sodium ion transport    15/321 70/3849
## GO:0006883 GO:0006883 intracellular sodium ion homeostasis     6/321 14/3849
## GO:0009988 GO:0009988                cell-cell recognition     7/321 19/3849
## GO:0055078 GO:0055078               sodium ion homeostasis     6/321 15/3849
##                  pvalue  p.adjust    qvalue
## GO:0042133 5.060152e-05 0.1405204 0.1405204
## GO:0035725 4.290033e-04 0.3050968 0.3050968
## GO:0006814 4.927148e-04 0.3050968 0.3050968
## GO:0006883 5.419573e-04 0.3050968 0.3050968
## GO:0009988 5.493280e-04 0.3050968 0.3050968
## GO:0055078 8.405293e-04 0.3292642 0.3292642
##                                                                                                           geneID
## GO:0042133                                                                      Gad1/Gad2/Maoa/Maob/Hnmt/Aldh9a1
## GO:0035725                  Slc4a4/Hcn2/Atp1b2/Slc6a11/Slc12a2/Slc8a1/Atp1b3/Atp1a2/Tpcn2/Slc4a8/Tesc/Nos1/Stk39
## GO:0006814 Slc4a4/Hcn2/Atp1b2/Slc6a11/Slc12a2/Slc8a1/Atp1b3/Serpine2/Atp1a2/Tpcn2/Slc4a8/Slc13a1/Tesc/Nos1/Stk39
## GO:0006883                                                              Atp1b2/Slc12a2/Slc8a1/Atp1b3/Atp1a2/Tesc
## GO:0009988                                                                  Hspa1l/Msn/Cd81/Cct2/Cct5/Cct6a/Cct3
## GO:0055078                                                              Atp1b2/Slc12a2/Slc8a1/Atp1b3/Atp1a2/Tesc
##            Count ONTOLOGY
## GO:0042133     6       BP
## GO:0035725    13       BP
## GO:0006814    15       BP
## GO:0006883     6       BP
## GO:0009988     7       BP
## GO:0055078     6       BP

## GO plot
# Step 1: Add -log10(p-value) column
combined_GO_DOWN$minusLog10P <- -log10(combined_GO_DOWN$pvalue)

# Step 2: Select top 10 unique GO terms by p.adjust
top10_GO_DOWN <- combined_GO_DOWN[order(combined_GO_DOWN$p.adjust), ]
top10_GO_DOWN <- top10_GO_DOWN[!duplicated(top10_GO_DOWN$Description), ]
top10_GO_DOWN <- head(top10_GO_DOWN, 10)

# Step 3: Plot bar plot using ggplot2
library(ggplot2)
#pdf("GO_DOWN_Aldh1l1MetRS_296_331.pdf", width = 10, height = 6)  # adjust size as needed

# Plot: Count on Y, Pathways on X, fill by -log10(pvalue)
ggplot(top10_GO_DOWN, aes(x = reorder(Description, Count), y = Count, fill = minusLog10P)) +
    geom_bar(stat = "identity", width = 0.8) +
    coord_flip() +
    scale_fill_gradient(
        low = "blue",
        high = "darkblue",
        limits = c(0, 1),
        oob = scales::squish  # Squish values outside [0,1] to the boundary
    ) +
    labs(title = "Top-10 Enriched GO Terms (DOWNregulated)",
         x = "GO Term",
         y = "Gene Count",
         fill = "-log10(p-value)") +
    theme_minimal(base_size = 12) +
    theme(axis.text.y = element_text(size = 10))

#dev.off()

# Save tables
write.csv(combined_GO_DOWN, file = 'GO.DOWN.Aldh1l1MetRS_296_331.csv')
write.csv(top10_GO_DOWN, file = 'GO.TOP.DOWN.Aldh1l1MetRS_296_331.csv')

METRS.PULLDOWN

Upasna Srivastava

2025-05-12