#Gene Ontology

library(org.Mm.eg.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
##     tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
library(clusterProfiler)
## 
## clusterProfiler v4.12.0  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
## 
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following object is masked from 'package:IRanges':
## 
##     slice
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## The following object is masked from 'package:stats':
## 
##     filter
library(enrichplot)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(DOSE)
## DOSE v3.30.1  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
# Read the data
input <- read.csv("Nr2f6_GFP_DEG.csv", header = TRUE)

# Filter for upregulated genes
GO_up <- input %>%
  filter(avg_log2FC >= 1 & p_val_adj <= 0.05)

# Filter for downregulated genes
GO_down <- input %>%
  filter(avg_log2FC <= -1 & p_val_adj <= 0.05)

#write the outout for the Cytoscpate analysis
write.csv(GO_up,"GO_up.csv",row.names = F)
write.csv(GO_down,"GO_Down.csv",row.names = F)
# Map gene symbols to Entrez IDs for upregulated genes
my_symbols_up <- GO_up$Gene_name
Entrezid_up <- AnnotationDbi::select(org.Mm.eg.db, keys = my_symbols_up, columns = c("ENTREZID", "SYMBOL"), keytype = "SYMBOL")
## 'select()' returned 1:1 mapping between keys and columns
# Merge with the filtered data
colnames(GO_up)[1] <- "SYMBOL"
combined_up <- merge(GO_up, Entrezid_up, by = "SYMBOL")
combined_up <- na.omit(combined_up)

# Prepare the gene list for GSEA
genelist_up <- combined_up$avg_log2FC
names(genelist_up) <- as.character(combined_up$ENTREZID)
genelist_up <- sort(genelist_up, decreasing = TRUE)
gse_up <- gseGO(
  geneList = genelist_up,
  ont = "all",
  keyType = "ENTREZID",
  minGSSize = 3,
  maxGSSize = 500,
  pvalueCutoff = 0.05,
  verbose = TRUE,
  OrgDb = org.Mm.eg.db,
  pAdjustMethod = "none",
  nPermSimple = 1000
)
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## leading edge analysis...
## done...
dotplot(gse_up) 

write.csv(gse_up,"Gse_up.csv")
# Load necessary libraries
library(ggplot2)
library(dplyr)

# Load the data
input <- read.csv("DAVID.txt", sep="\t", header=TRUE)

# Preprocess the data: extract the GO term name
input <- input %>%
  mutate(Term = sub(".*~", "", Term))

# Create the dot plot
ggplot(input, aes(y = Term, x = Count)) +
  geom_point(aes(size = Count, color = Fold.Enrichment)) +
  scale_color_gradient(low = "blue", high = "red") +
  theme_classic() +
  labs(
    title = "Dot Plot of GO Term Enrichment using DAVID",
    y = "GO Term",
    x = "Count",
    color = "Fold Enrichment",
    size = "Count"
  ) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))