#Gene Ontology
library(org.Mm.eg.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
## tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
library(clusterProfiler)
##
## clusterProfiler v4.12.0 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
##
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:stats':
##
## filter
library(enrichplot)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following object is masked from 'package:Biobase':
##
## combine
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(DOSE)
## DOSE v3.30.1 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
# Read the data
input <- read.csv("Nr2f6_GFP_DEG.csv", header = TRUE)
# Filter for upregulated genes
GO_up <- input %>%
filter(avg_log2FC >= 1 & p_val_adj <= 0.05)
# Filter for downregulated genes
GO_down <- input %>%
filter(avg_log2FC <= -1 & p_val_adj <= 0.05)
#write the outout for the Cytoscpate analysis
write.csv(GO_up,"GO_up.csv",row.names = F)
write.csv(GO_down,"GO_Down.csv",row.names = F)
# Map gene symbols to Entrez IDs for upregulated genes
my_symbols_up <- GO_up$Gene_name
Entrezid_up <- AnnotationDbi::select(org.Mm.eg.db, keys = my_symbols_up, columns = c("ENTREZID", "SYMBOL"), keytype = "SYMBOL")
## 'select()' returned 1:1 mapping between keys and columns
# Merge with the filtered data
colnames(GO_up)[1] <- "SYMBOL"
combined_up <- merge(GO_up, Entrezid_up, by = "SYMBOL")
combined_up <- na.omit(combined_up)
# Prepare the gene list for GSEA
genelist_up <- combined_up$avg_log2FC
names(genelist_up) <- as.character(combined_up$ENTREZID)
genelist_up <- sort(genelist_up, decreasing = TRUE)
gse_up <- gseGO(
geneList = genelist_up,
ont = "all",
keyType = "ENTREZID",
minGSSize = 3,
maxGSSize = 500,
pvalueCutoff = 0.05,
verbose = TRUE,
OrgDb = org.Mm.eg.db,
pAdjustMethod = "none",
nPermSimple = 1000
)
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## leading edge analysis...
## done...
dotplot(gse_up)
write.csv(gse_up,"Gse_up.csv")
# Load necessary libraries
library(ggplot2)
library(dplyr)
# Load the data
input <- read.csv("DAVID.txt", sep="\t", header=TRUE)
# Preprocess the data: extract the GO term name
input <- input %>%
mutate(Term = sub(".*~", "", Term))
# Create the dot plot
ggplot(input, aes(y = Term, x = Count)) +
geom_point(aes(size = Count, color = Fold.Enrichment)) +
scale_color_gradient(low = "blue", high = "red") +
theme_classic() +
labs(
title = "Dot Plot of GO Term Enrichment using DAVID",
y = "GO Term",
x = "Count",
color = "Fold Enrichment",
size = "Count"
) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))