Untitled

#Gene Ontology

library(org.Mm.eg.db)

## Loading required package: AnnotationDbi

## Loading required package: stats4

## Loading required package: BiocGenerics

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
##     tapply, union, unique, unsplit, which.max, which.min

## Loading required package: Biobase

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

## Loading required package: IRanges

## Loading required package: S4Vectors

## 
## Attaching package: 'S4Vectors'

## The following object is masked from 'package:utils':
## 
##     findMatches

## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname

##

library(clusterProfiler)

##

## clusterProfiler v4.12.0  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141

## 
## Attaching package: 'clusterProfiler'

## The following object is masked from 'package:AnnotationDbi':
## 
##     select

## The following object is masked from 'package:IRanges':
## 
##     slice

## The following object is masked from 'package:S4Vectors':
## 
##     rename

## The following object is masked from 'package:stats':
## 
##     filter

library(enrichplot)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:AnnotationDbi':
## 
##     select

## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union

## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union

## The following object is masked from 'package:Biobase':
## 
##     combine

## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(DOSE)

## DOSE v3.30.1  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609

# Read the data
input <- read.csv("Nr2f6_GFP_DEG.csv", header = TRUE)

# Filter for upregulated genes
GO_up <- input %>%
  filter(avg_log2FC >= 1 & p_val_adj <= 0.05)

# Filter for downregulated genes
GO_down <- input %>%
  filter(avg_log2FC <= -1 & p_val_adj <= 0.05)

#write the outout for the Cytoscpate analysis
write.csv(GO_up,"GO_up.csv",row.names = F)
write.csv(GO_down,"GO_Down.csv",row.names = F)

# Map gene symbols to Entrez IDs for upregulated genes
my_symbols_up <- GO_up$Gene_name
Entrezid_up <- AnnotationDbi::select(org.Mm.eg.db, keys = my_symbols_up, columns = c("ENTREZID", "SYMBOL"), keytype = "SYMBOL")

## 'select()' returned 1:1 mapping between keys and columns

# Merge with the filtered data
colnames(GO_up)[1] <- "SYMBOL"
combined_up <- merge(GO_up, Entrezid_up, by = "SYMBOL")
combined_up <- na.omit(combined_up)

# Prepare the gene list for GSEA
genelist_up <- combined_up$avg_log2FC
names(genelist_up) <- as.character(combined_up$ENTREZID)
genelist_up <- sort(genelist_up, decreasing = TRUE)

gse_up <- gseGO(
  geneList = genelist_up,
  ont = "all",
  keyType = "ENTREZID",
  minGSSize = 3,
  maxGSSize = 500,
  pvalueCutoff = 0.05,
  verbose = TRUE,
  OrgDb = org.Mm.eg.db,
  pAdjustMethod = "none",
  nPermSimple = 1000
)

## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).

## preparing geneSet collections...

## GSEA analysis...

## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".

## leading edge analysis...

## done...

dotplot(gse_up)

write.csv(gse_up,"Gse_up.csv")

# Load necessary libraries
library(ggplot2)
library(dplyr)

# Load the data
input <- read.csv("DAVID.txt", sep="\t", header=TRUE)

# Preprocess the data: extract the GO term name
input <- input %>%
  mutate(Term = sub(".*~", "", Term))

# Create the dot plot
ggplot(input, aes(y = Term, x = Count)) +
  geom_point(aes(size = Count, color = Fold.Enrichment)) +
  scale_color_gradient(low = "blue", high = "red") +
  theme_classic() +
  labs(
    title = "Dot Plot of GO Term Enrichment using DAVID",
    y = "GO Term",
    x = "Count",
    color = "Fold Enrichment",
    size = "Count"
  ) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

Untitled

Manoj

2024-07-04