##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Inicalmente vamos a trabajar con la base de datos all de la semana 1. El archivo se llama Rn5K_08_15_all.txt.
w1_all <- read.delim("C:/Users/acard/Downloads/R_project/MICROARREGLO_NAT/DATA/Work Semana 1/w1_all.txt")
kable(head(w1_all,20), caption = "BASE DE DATOS SEMANA 1")
| Cy3 | Cy5 | Id | Symbol | Zscore |
|---|---|---|---|---|
| 19.0 | 15.0 | AB008889 | Trpc4 | -2.0495676 |
| 17.0 | 19.0 | AF016182 | 0.9753379 | |
| 18.0 | 18.0 | BG375351 | 0.0074636 | |
| 21.0 | 16.5 | AJ223599 | Kif3c | -2.0911042 |
| 20.3 | 17.4 | AF023657 | Enman | -1.3339400 |
| 19.9 | 18.0 | NM_019315 | Kcnn3 | -0.8657543 |
| 18.0 | 20.4 | U39208 | 1.0966206 | |
| 17.0 | 22.0 | AW140637 | 2.2510665 | |
| 18.0 | 21.0 | NM_031542 | Brca2 | 1.3488673 |
| 21.0 | 19.0 | AB010467 | Abcc3 | -0.8634525 |
| 20.5 | 21.5 | NM_019368 | Bet1l | 0.4219181 |
| 23.0 | 20.3 | NM_019274 | Colq | -1.0791715 |
| 23.0 | 22.0 | NM_031087 | Psen2 | -0.3793511 |
| 24.0 | 23.2 | BF549571 | -0.2875443 | |
| 23.5 | 23.9 | NM_012581 | Hoxa2 | 0.1543346 |
| 22.4 | 25.5 | L22294 | Pdk1 | 1.1353820 |
| 28.0 | 28.0 | M84000 | Pzp | 0.0074636 |
| 27.0 | 30.5 | AW918595 | 1.0681365 | |
| 29.4 | 29.2 | NM_031332 | LOC83500 | -0.0519352 |
| 27.7 | 31.4 | L07073 | Ap3m1 | 1.0984684 |
Esta base de datos tiene 5 columnas
Esta base de datos tiene una dimensión de 5593 registros/genes/clones y 5 variables/columnas
gene.data <- w1_all
dim(gene.data)
## [1] 5593 5
La cantidad de genes anotados con símbolos es de solo 22% aproximadamente
df <- data.frame(anotacion = c("con simbolo", "sin simbolo"),
valor = c(length(which(gene.data$Symbol == "")),
length(which(gene.data$Symbol != "")))
)
df <- df %>%
mutate(prop = round(valor / nrow(gene.data) *100,2))
bp<- ggplot(df, aes(x="", y=prop, fill=anotacion))+
geom_bar(width = 1, stat = "identity")+
coord_polar("y", start=0)
bp
kable(df)
| anotacion | valor | prop |
|---|---|---|
| con simbolo | 1230 | 21.99 |
| sin simbolo | 4363 | 78.01 |
Por esta razón, y porque los paquetes habitualmente usan la codificación ENTREZ GENE ID para hacer los análisis de enriquecimiento vamos a agregar las anotaciones con las bases de datos org.Rn.eg.db
Primero tenemos que importar la base de datos específica para rata
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("org.Rn.eg.db")
Una vez que se instala esta base de datos, se carga al entorno de trabajo y con la función columns() podemos ver que terminos podemos asociar
library(org.Rn.eg.db)
## Loading required package: AnnotationDbi
## Warning: package 'AnnotationDbi' was built under R version 4.0.3
## Loading required package: stats4
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.0.3
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 4.0.3
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 4.0.3
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.0.3
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:base':
##
## expand.grid
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:grDevices':
##
## windows
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
##
columns(org.Rn.eg.db)
## [1] "ACCNUM" "ALIAS" "ENSEMBL" "ENSEMBLPROT" "ENSEMBLTRANS"
## [6] "ENTREZID" "ENZYME" "EVIDENCE" "EVIDENCEALL" "GENENAME"
## [11] "GO" "GOALL" "IPI" "ONTOLOGY" "ONTOLOGYALL"
## [16] "PATH" "PFAM" "PMID" "PROSITE" "REFSEQ"
## [21] "SYMBOL" "UNIGENE" "UNIPROT"
Estas diferentes columnas nos dicen que terminos podemos anotar, como por ejemplo
| Nombre | definición |
|---|---|
| “ACCNUM” | Número de acceso del gen |
| “ENSEMBL” | Código Ensembl |
| “REFSEQ” | Código de refseq de NCBI |
| “ENTREZID” | Código Entrez gene id |
Como esta base de datos tiene tantos valores de “REFSEQ”, como de “ACCNUM” y GenBank (este último no definido en la base de datos utilizada), vamos a ir anotando uno por uno estos datos.
Vamos a crear un objeto temporal temp para ir agregando estos términos
temp <-AnnotationDbi::select(org.Rn.eg.db,
keys=as.character(gene.data$Id),
columns="ENTREZID",
keytype="REFSEQ")
## 'select()' returned 1:1 mapping between keys and columns
temp2 <- temp[is.na(temp$ENTREZID),]
temp <- temp[complete.cases(temp),]
names(temp) <- c("Id", "ENTREZID")
dim(temp)
## [1] 2731 2
kable(head(temp,10))
| Id | ENTREZID | |
|---|---|---|
| 6 | NM_019315 | 54263 |
| 9 | NM_031542 | 360254 |
| 11 | NM_019368 | 54400 |
| 12 | NM_019274 | 29755 |
| 13 | NM_031087 | 81751 |
| 15 | NM_012581 | 103690123 |
| 19 | NM_031332 | 83500 |
| 21 | NM_017267 | 29635 |
| 26 | NM_013105 | 25642 |
| 28 | NM_031801 | 83632 |
Con esto se logra anotar 2731 de los 5593 genes anotados para el microarreglo.
Algunos del resto de genes pueden ser anotados con el término ACCUM. Para ello vamos a utilizar los genes no anotados anteriormente (temp2), y sobre generar el nuevo data.frame con las anotaciones.
names(temp2) <- c("Id", "ENTREZID")
temp2 <-AnnotationDbi::select(org.Rn.eg.db,
keys=as.character(temp2$Id),
columns="ENTREZID",
keytype="ACCNUM")
## 'select()' returned 1:1 mapping between keys and columns
temp2 <- temp2[complete.cases(temp2),]
names(temp2) <- c("Id", "ENTREZID")
dim(temp2)
## [1] 1760 2
kable(head(temp2,10))
| Id | ENTREZID | |
|---|---|---|
| 1 | AB008889 | 84494 |
| 2 | AF016182 | 286914 |
| 4 | AJ223599 | 85248 |
| 6 | U39208 | 266689 |
| 8 | AB010467 | 140668 |
| 10 | L22294 | 116551 |
| 11 | M84000 | 252922 |
| 13 | L07073 | 171126 |
| 15 | AF010466 | 25712 |
| 16 | L40030 | 94203 |
Con esto se logra anotar 1760 genes, que junto con los 2731 anotados anteriormente ya se tiene 4491 de los 5593 genes anotados para el microarreglo.
Ahora agregaremos los resultados a la base de datos gene.data. Primero fusionamos los dos archivos temporales generados anteriormente (temp, temp2), y luego lo fusionamos con la base de datos gene.data con la función merge() sin eliminar los que aún no tinene ENTREZID asociados
temp <- rbind(temp,temp2)
gene.data <- merge(x = gene.data, y=temp, by = "Id", all = T)
kable(head(gene.data,20), caption = "BASE DE DATOS con ENTREZID")
| Id | Cy3 | Cy5 | Symbol | Zscore | ENTREZID |
|---|---|---|---|---|---|
| AA799713 | 28538.8 | 28914.0 | -0.0650482 | NA | |
| AA800019 | 8004.7 | 8665.5 | 1.3238440 | NA | |
| AA800172 | 5058.3 | 4891.9 | -0.2539321 | NA | |
| AA818018 | 31303.2 | 28960.7 | -0.6334669 | NA | |
| AA818786 | 396.0 | 421.0 | 0.4746041 | NA | |
| AA819151 | 3979.6 | 3806.5 | -0.4492376 | NA | |
| AA850135 | 4263.8 | 4096.8 | -0.5941062 | NA | |
| AA859006 | 13511.0 | 10860.9 | -1.9669838 | NA | |
| AA874881 | 2167.2 | 2103.9 | -0.4867235 | 64622 | |
| AA875305 | 275.3 | 232.0 | -1.1845871 | NA | |
| AA892272 | 4244.5 | 4394.0 | 0.4663000 | NA | |
| AA894165 | 377.2 | 374.3 | -0.0112549 | NA | |
| AA899618 | 32367.4 | 30951.9 | -0.4526110 | NA | |
| AA899923 | 4122.3 | 4449.5 | 0.9817978 | NA | |
| AA923857 | 8109.1 | 7909.9 | -0.3251830 | NA | |
| AA924509 | 109.9 | 117.9 | 0.6862849 | 502374 | |
| AA925587 | 8777.5 | 7869.0 | -1.6998069 | NA | |
| AA943126 | 4805.0 | 5105.7 | 0.9408647 | NA | |
| AA943594 | 13860.7 | 14181.4 | 0.2731025 | NA | |
| AA944017 | 4619.0 | 5185.6 | 1.4256890 | NA |
Como vemos aún faltan genes para anotar los ENTREZID, que si los cuantificamos con la siguiente linea de código, corresponde a
nrow(gene.data[is.na(gene.data$ENTREZID),])
## [1] 1102
a 1102 genes que aún no se han anotado. Para tratar de no perder estos genes, vamos a tratar de anotarlos utilizando su SIMBOLO.
Primero hacemos un subgrupo de los genes que aún no han sido anotados
temp <- gene.data[is.na(gene.data$ENTREZID),]
Ahora usamos la columna “SYMB” para hacer la anotación
temp <-AnnotationDbi::select(org.Rn.eg.db,
keys=as.character(temp$Symbol),
columns="ENTREZID",
keytype="SYMBOL")
## 'select()' returned many:1 mapping between keys and columns
temp <- temp[!is.na(temp$ENTREZID),]
kable(head(temp,20))
| SYMBOL | ENTREZID | |
|---|---|---|
| 38 | Smarcd2 | 83833 |
| 43 | Kcnj13 | 94341 |
| 44 | Syngap1 | 192117 |
| 48 | Slc2a8 | 85256 |
| 49 | Cpb2 | 113936 |
| 50 | Sval1 | 680174 |
| 55 | Grin2a | 24409 |
| 56 | Cdh14 | 25198 |
| 57 | Igfals | 79438 |
| 60 | Itgae | 83577 |
| 63 | Gfap | 24387 |
| 72 | Stx12 | 65033 |
| 73 | Cdc20 | 64515 |
| 89 | Ogfr | 83525 |
| 92 | Egfr | 24329 |
| 96 | Tert | 301965 |
| 97 | Kcnj16 | 29719 |
| 98 | Sacm1l | 116482 |
| 100 | Sry | 25221 |
| 103 | Adra2b | 24174 |
dim(temp)
## [1] 81 2
Con esta estrategia logramos anotar 81 genes mas. Ahora agregaremos estos datos a la base de datos gene.data
for (i in 1:nrow(temp)){
gene.data[which(gene.data$Symbol==temp$SYMBOL[i]),6] <- temp$ENTREZID[i]
}
Ahora vamos a revisar cuantos genes han sido anotados
df <- data.frame(anotacion = c("con ENTREZ", "sin ENTREZ"),
valor = c(nrow(gene.data[!is.na(gene.data$ENTREZID),]),
nrow(gene.data[is.na(gene.data$ENTREZID),]))
)
df <- df %>%
mutate(prop = round(valor / nrow(gene.data) *100,2))
bp<- ggplot(df, aes(x="", y=prop, fill=anotacion))+
geom_bar(width = 1, stat = "identity")+
coord_polar("y", start=0)
bp
kable(df)
| anotacion | valor | prop |
|---|---|---|
| con ENTREZ | 4572 | 81.75 |
| sin ENTREZ | 1021 | 18.25 |
Aún tenemos una perdida de casi el 14% de genes sin anotar. Si buscamos estos Ids faltantes con la herramienta en linea DAVID, tampoco logramos conseguir anotaciones.
Ahora, aprovechand que ya tenemos los códigos ENTREZ GENE ID, vamos a ver que genes están anotados de forma duplicada. Primero eliminaremos los datos sin código ENTREZ GENE ID, porque todos ellos serían duplicados, para luego usar la función duplicated() podemos encontrar los duplicados.
gene.data.fil <- gene.data[!is.na(gene.data$ENTREZID),]
duplicados <- gene.data.fil[duplicated(gene.data.fil$ENTREZID),]
dim(duplicados)
## [1] 73 6
dim(gene.data.fil)
## [1] 4572 6
De los 5593 genes iniciales, anotamos 4491 (85%) de los genes, de estos 1833 están duplicados, quedando 2658 genes no duplicados
df <- data.frame(grupo = c("No anotados", "Duplicados", "Unicos"),
valor = c(1102, 1833, 2658)
)
df <- df %>%
mutate(prop = round((valor/5593) *100,2))
bp<- ggplot(df, aes(x="", y=prop, fill=grupo))+
geom_bar(width = 1, stat = "identity")+
coord_polar("y", start=0)
bp
kable(df)
| grupo | valor | prop |
|---|---|---|
| No anotados | 1102 | 19.70 |
| Duplicados | 1833 | 32.77 |
| Unicos | 2658 | 47.52 |
Ahora vamos a analizar algunos de estos duplicados
kable(head(duplicados,10), caption = "DUPLICADOS")
| Id | Cy3 | Cy5 | Symbol | Zscore | ENTREZID | |
|---|---|---|---|---|---|---|
| 145 | AB032828 | 7922.4 | 7892.5 | Epb4.1l3 | -0.0382058 | 116724 |
| 386 | AF071205 | 20689.9 | 23120.5 | LOC60428 | 1.2469631 | 60427 |
| 411 | AF084544 | 2595.4 | 2544.8 | -0.2622435 | 114122 | |
| 692 | AF277892 | 25253.1 | 25523.3 | Gnb2 | 0.1004706 | 81667 |
| 722 | AF302842 | 4124.3 | 3663.8 | Kcnk2 | -1.6562332 | 116489 |
| 756 | AF322217 | 4768.3 | 4868.5 | 0.2638374 | 302822 | |
| 1284 | AY125814 | 31721.0 | 31417.1 | -0.1547005 | 292657 | |
| 2030 | L19109 | 4342.7 | 4394.3 | 0.1951188 | 25022 | |
| 2052 | L24776 | 1290.9 | 1476.4 | Tpm3 | 1.7900972 | 117557 |
| 2083 | L41045 | 1522.7 | 1419.4 | Pde1c | -1.4858760 | 81742 |
Vamos a buscar el gen mas repetido
kable(head(sort(table(duplicados$ENTREZID),decreasing = T)))
| Var1 | Freq |
|---|---|
| 117557 | 2 |
| 114122 | 1 |
| 114247 | 1 |
| 116489 | 1 |
| 116561 | 1 |
| 116591 | 1 |
Este correspondería al código ENTREZID “117557”. Vamos a buscar en la base de datos a que genes se anoto este ENTREZ GENE ID
gene.data[which(gene.data$ENTREZID == "117557"),]
## Id Cy3 Cy5 Symbol Zscore ENTREZID
## 345 AF053359 912.5 931.8 0.3236289 117557
## 2052 L24776 1290.9 1476.4 Tpm3 1.7900972 117557
## 5454 X72859 11534.3 10982.0 -0.4973582 117557
Efectivamente estos tres Ids corresponden al mismo gen de tropomiosina.
Uno hace referencia al gen completo y los otros a fragmentos o isoformas
Primero creamos el objeto Mart para Rattus norvegicus
library(biomaRt)
## Warning: package 'biomaRt' was built under R version 4.0.3
biomaRt::listMarts()
## biomart version
## 1 ENSEMBL_MART_ENSEMBL Ensembl Genes 102
## 2 ENSEMBL_MART_MOUSE Mouse strains 102
## 3 ENSEMBL_MART_SNP Ensembl Variation 102
## 4 ENSEMBL_MART_FUNCGEN Ensembl Regulation 102
ensembl <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL")
head(biomaRt::listDatasets(mart = ensembl))
## dataset description
## 1 acalliptera_gene_ensembl Eastern happy genes (fAstCal1.2)
## 2 acarolinensis_gene_ensembl Anole lizard genes (AnoCar2.0)
## 3 acchrysaetos_gene_ensembl Golden eagle genes (bAquChr1.2)
## 4 acitrinellus_gene_ensembl Midas cichlid genes (Midas_v5)
## 5 amelanoleuca_gene_ensembl Panda genes (ailMel1)
## 6 amexicanus_gene_ensembl Mexican tetra genes (Astyanax_mexicanus-2.0)
## version
## 1 fAstCal1.2
## 2 AnoCar2.0
## 3 bAquChr1.2
## 4 Midas_v5
## 5 ailMel1
## 6 Astyanax_mexicanus-2.0
biomaRt::searchDatasets(mart = ensembl, pattern = "rnorvegicus")
## dataset description version
## 160 rnorvegicus_gene_ensembl Rat genes (Rnor_6.0) Rnor_6.0
ensembl <- biomaRt::useMart("ensembl", dataset = "rnorvegicus_gene_ensembl")
ensembl
## Object of class 'Mart':
## Using the ENSEMBL_MART_ENSEMBL BioMart database
## Using the rnorvegicus_gene_ensembl dataset
Ahora agregamos los términos GO usando la lista de ENTREZ GEN ID que generamos anteiormete.
head(gene.data.fil)
## Id Cy3 Cy5 Symbol Zscore ENTREZID
## 9 AA874881 2167.2 2103.9 -0.4867235 64622
## 16 AA924509 109.9 117.9 0.6862849 502374
## 40 AB000199 4825.7 4055.9 Cca2 -2.8366111 246211
## 41 AB000215 20732.7 21098.9 Smpd3 0.3974930 94338
## 42 AB000216 12556.3 13169.6 Cca3 0.3892495 171440
## 43 AB000280 9452.5 9306.7 LOC246280 -0.2360655 246280
biomaRt::searchFilters(mart = ensembl, pattern = "entrez")
## name
## 17 with_entrezgene_trans_name
## 29 with_entrezgene
## 64 entrezgene_trans_name
## 78 entrezgene_accession
## 79 entrezgene_id
## description
## 17 With EntrezGene transcript name ID(s)
## 29 With NCBI gene (formerly Entrezgene) ID(s)
## 64 EntrezGene transcript name ID(s) [e.g. Foxb2-202]
## 78 NCBI gene (formerly Entrezgene) accession(s) [e.g. A1bg]
## 79 NCBI gene (formerly Entrezgene) ID(s) [e.g. 100034253]
goannot <- biomaRt::getBM(attributes = c("entrezgene_id", "go_id", "name_1006"),
filters = "entrezgene_id",
values = gene.data.fil$ENTREZID,
mart = ensembl)
kable(head(goannot,20))
| entrezgene_id | go_id | name_1006 |
|---|---|---|
| 100134871 | GO:0046872 | metal ion binding |
| 100134871 | GO:0020037 | heme binding |
| 100134871 | GO:0005344 | oxygen carrier activity |
| 100134871 | GO:0019825 | oxygen binding |
| 100134871 | GO:0015671 | oxygen transport |
| 100134871 | GO:0098869 | cellular oxidant detoxification |
| 100134871 | GO:0004601 | peroxidase activity |
| 100134871 | GO:0031720 | haptoglobin binding |
| 100134871 | GO:0043177 | organic acid binding |
| 100134871 | GO:0042744 | hydrogen peroxide catabolic process |
| 100134871 | GO:0005833 | hemoglobin complex |
| 100134871 | GO:0031838 | haptoglobin-hemoglobin complex |
| 100134871 | GO:0031721 | hemoglobin alpha binding |
| 100188944 | GO:0016020 | membrane |
| 100188944 | GO:0016021 | integral component of membrane |
| 100188944 | GO:0032225 | regulation of synaptic transmission, dopaminergic |
| 100188944 | GO:0042053 | regulation of dopamine metabolic process |
| 100188944 | GO:0046929 | negative regulation of neurotransmitter secretion |
| 100188944 | GO:0050884 | neuromuscular process controlling posture |
| 100188944 | GO:0016787 | hydrolase activity |
Para
pathannot<- AnnotationDbi::select(org.Rn.eg.db,
keys = as.character(gene.data.fil$ENTREZID),
columns = "PATH",
keytype = "ENTREZID")
## 'select()' returned many:many mapping between keys and columns
kable(head(pathannot,20))
| ENTREZID | PATH |
|---|---|
| 64622 | NA |
| 502374 | 00230 |
| 502374 | 00240 |
| 502374 | 01100 |
| 502374 | 03020 |
| 502374 | 05016 |
| 246211 | 00120 |
| 246211 | 01100 |
| 94338 | 00600 |
| 94338 | 01100 |
| 171440 | NA |
| 246280 | NA |
| 81826 | NA |
| 84609 | 04360 |
| 29744 | 04360 |
| 85271 | NA |
| 171134 | NA |
| 116718 | 04146 |
| 89789 | NA |
| 113959 | 04080 |
Primero debemos definir los genes de interes (+/- 2Zscore)
target.gene <- subset(x = gene.data.fil, subset = (Zscore <= -2)|(Zscore >=2))
Primero cargamos el paquete GOstats
## Warning: package 'GOstats' was built under R version 4.0.3
## Loading required package: Category
## Warning: package 'Category' was built under R version 4.0.3
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:S4Vectors':
##
## expand
## Loading required package: graph
## Warning: package 'graph' was built under R version 4.0.3
##
##
## Attaching package: 'GOstats'
## The following object is masked from 'package:AnnotationDbi':
##
## makeGOGraph
Ahora que ya tenemos definido los genes objetivos podemos crear el objeto
paramsGO <- new("GOHyperGParams",
geneIds = target.gene$ENTREZID,
universeGeneIds = gene.data.fil$ENTREZID,
annotation = "org.Rn.eg.db",
ontology = "BP",
pvalueCutoff = 0.01,
conditional = FALSE,
testDirection = "over")
## Warning in makeValidParams(.Object): removing duplicate IDs in universeGeneIds
Over.GO <- hyperGTest(paramsGO)
kable(head(summary(Over.GO),20), caption = "GO TERMS")
| GOBPID | Pvalue | OddsRatio | ExpCount | Count | Size | Term |
|---|---|---|---|---|---|---|
| GO:0033084 | 0.0001451 | 27.721393 | 0.3271028 | 4 | 7 | regulation of immature T cell proliferation in thymus |
| GO:0033080 | 0.0002796 | 20.786070 | 0.3738318 | 4 | 8 | immature T cell proliferation in thymus |
| GO:0033083 | 0.0002796 | 20.786070 | 0.3738318 | 4 | 8 | regulation of immature T cell proliferation |
| GO:0033079 | 0.0004848 | 16.624876 | 0.4205607 | 4 | 9 | immature T cell proliferation |
| GO:0038007 | 0.0009379 | 31.039604 | 0.2336449 | 3 | 5 | netrin-activated signaling pathway |
| GO:0043407 | 0.0009649 | 5.244949 | 1.6355140 | 7 | 35 | negative regulation of MAP kinase activity |
| GO:0009153 | 0.0021734 | Inf | 0.0934579 | 2 | 2 | purine deoxyribonucleotide biosynthetic process |
| GO:0018960 | 0.0021734 | Inf | 0.0934579 | 2 | 2 | 4-nitrophenol metabolic process |
| GO:0072191 | 0.0021734 | Inf | 0.0934579 | 2 | 2 | ureter smooth muscle development |
| GO:0072193 | 0.0021734 | Inf | 0.0934579 | 2 | 2 | ureter smooth muscle cell differentiation |
| GO:0072198 | 0.0021734 | Inf | 0.0934579 | 2 | 2 | mesenchymal cell proliferation involved in ureter development |
| GO:0072199 | 0.0021734 | Inf | 0.0934579 | 2 | 2 | regulation of mesenchymal cell proliferation involved in ureter development |
| GO:0009123 | 0.0023372 | 6.509375 | 0.9813084 | 5 | 21 | nucleoside monophosphate metabolic process |
| GO:0001906 | 0.0028932 | 2.654948 | 5.4672897 | 13 | 117 | cell killing |
| GO:0002320 | 0.0030609 | 15.512376 | 0.3271028 | 3 | 7 | lymphoid progenitor cell differentiation |
| GO:0009151 | 0.0030609 | 15.512376 | 0.3271028 | 3 | 7 | purine deoxyribonucleotide metabolic process |
| GO:0071315 | 0.0030609 | 15.512376 | 0.3271028 | 3 | 7 | cellular response to morphine |
| GO:0033081 | 0.0032013 | 8.302488 | 0.6542056 | 4 | 14 | regulation of T cell differentiation in thymus |
| GO:1901381 | 0.0035812 | 5.783333 | 1.0747664 | 5 | 23 | positive regulation of potassium ion transmembrane transport |
| GO:0003184 | 0.0047298 | 12.406931 | 0.3738318 | 3 | 8 | pulmonary valve morphogenesis |
Finalmente haremos estee análisis para las vías KEGG
paramsKEGG <- new("KEGGHyperGParams",
geneIds = target.gene$ENTREZID,
universeGeneIds = gene.data$ENTREZID,
annotation = "org.Rn.eg.db",
pvalueCutoff = 0.05,
testDirection ="over")
## Warning in makeValidParams(.Object): removing duplicate IDs in universeGeneIds
Over.KEGG <- hyperGTest(paramsKEGG)
kable(head(summary(Over.KEGG),20), caption = "KEEG PATHWAY")
##
## KEGG.db contains mappings based on older data because the original
## resource was removed from the the public domain before the most
## recent update was produced. This package should now be considered
## deprecated and future versions of Bioconductor may not have it
## available. Users who want more current data are encouraged to look
## at the KEGGREST or reactome.db packages
| KEGGID | Pvalue | OddsRatio | ExpCount | Count | Size | Term |
|---|---|---|---|---|---|---|
| 04330 | 0.0058763 | 6.631884 | 0.7603168 | 4 | 19 | Notch signaling pathway |
| 00603 | 0.0213993 | 12.228723 | 0.2401000 | 2 | 6 | Glycosphingolipid biosynthesis - globo series |
| 00051 | 0.0377866 | 4.610887 | 0.7603168 | 3 | 19 | Fructose and mannose metabolism |
Primero cargamos el paquete
# BiocManager::install("topGO")
library(topGO)
## Warning: package 'topGO' was built under R version 4.0.3
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
##
## groupGOTerms: GOBPTerm, GOMFTerm, GOCCTerm environments built.
##
## Attaching package: 'topGO'
## The following object is masked from 'package:GOstats':
##
## testName
## The following objects are masked from 'package:Category':
##
## ontology, ontology<-, testName
## The following object is masked from 'package:IRanges':
##
## members
Generamos la tabla con los términos usando la base de datos org.Rn.eg.db
goannot2<-select(org.Rn.eg.db,
keys = as.character(gene.data.fil$ENTREZID),
columns="GO",
keytype="ENTREZID")
## 'select()' returned many:many mapping between keys and columns
kable(head(goannot2,20), caption = "GO TERMS")
| ENTREZID | GO | EVIDENCE | ONTOLOGY |
|---|---|---|---|
| 64622 | GO:0003677 | IEA | MF |
| 64622 | GO:0003682 | ISO | MF |
| 64622 | GO:0005507 | IDA | MF |
| 64622 | GO:0005615 | IDA | CC |
| 64622 | GO:0005615 | ISO | CC |
| 64622 | GO:0005634 | IDA | CC |
| 64622 | GO:0005737 | IDA | CC |
| 64622 | GO:0007614 | IDA | BP |
| 64622 | GO:0009743 | IEP | BP |
| 64622 | GO:0010035 | IEP | BP |
| 64622 | GO:0010629 | IDA | BP |
| 64622 | GO:0010835 | IDA | BP |
| 64622 | GO:0010976 | IDA | BP |
| 64622 | GO:0019934 | IDA | BP |
| 64622 | GO:0030424 | IDA | CC |
| 64622 | GO:0030425 | IDA | CC |
| 64622 | GO:0031668 | IEP | BP |
| 64622 | GO:0032091 | IDA | BP |
| 64622 | GO:0032147 | IDA | BP |
| 64622 | GO:0033484 | IDA | BP |
Convertimos estos datos en una estructura aceptable para topGO
a <- 1
gene2GO<-c()
genevec <- c()
for (genid in gene.data.fil$ENTREZID){
genevec[a] <- genid
gene2GO[a] <- list(goannot2[goannot2$ENTREZID==genid,"GO"])
a <- a+1
}
names(gene2GO)<-genevec
head(gene2GO)
## $`64622`
## [1] "GO:0003677" "GO:0003682" "GO:0005507" "GO:0005615" "GO:0005615"
## [6] "GO:0005634" "GO:0005737" "GO:0007614" "GO:0009743" "GO:0010035"
## [11] "GO:0010629" "GO:0010835" "GO:0010976" "GO:0019934" "GO:0030424"
## [16] "GO:0030425" "GO:0031668" "GO:0032091" "GO:0032147" "GO:0033484"
## [21] "GO:0042277" "GO:0043025" "GO:0043524" "GO:0043524" "GO:0044849"
## [26] "GO:0045773" "GO:0048487" "GO:0050731" "GO:0050805" "GO:0051965"
## [31] "GO:0003677" "GO:0003682" "GO:0005507" "GO:0005615" "GO:0005615"
## [36] "GO:0005634" "GO:0005737" "GO:0007614" "GO:0009743" "GO:0010035"
## [41] "GO:0010629" "GO:0010835" "GO:0010976" "GO:0019934" "GO:0030424"
## [46] "GO:0030425" "GO:0031668" "GO:0032091" "GO:0032147" "GO:0033484"
## [51] "GO:0042277" "GO:0043025" "GO:0043524" "GO:0043524" "GO:0044849"
## [56] "GO:0045773" "GO:0048487" "GO:0050731" "GO:0050805" "GO:0051965"
##
## $`502374`
## [1] "GO:0005634" "GO:0005665" "GO:0006366"
##
## $`246211`
## [1] "GO:0001558" "GO:0003854" "GO:0005789" "GO:0005811" "GO:0005811"
## [6] "GO:0006694" "GO:0006707" "GO:0008206" "GO:0016021" "GO:0016491"
## [11] "GO:0016616" "GO:0016853" "GO:0035754" "GO:0035754" "GO:0035754"
## [16] "GO:0043231" "GO:0047016" "GO:0047016" "GO:0047016" "GO:0055114"
## [21] "GO:0097421"
##
## $`94338`
## [1] "GO:0000137" "GO:0000137" "GO:0000139" "GO:0001501" "GO:0001503"
## [6] "GO:0001932" "GO:0001932" "GO:0001958" "GO:0001958" "GO:0002063"
## [11] "GO:0002244" "GO:0002244" "GO:0002685" "GO:0002685" "GO:0003433"
## [16] "GO:0003433" "GO:0004620" "GO:0004767" "GO:0004767" "GO:0005515"
## [21] "GO:0005576" "GO:0005737" "GO:0005737" "GO:0005794" "GO:0005886"
## [26] "GO:0005886" "GO:0006665" "GO:0006672" "GO:0006672" "GO:0006684"
## [31] "GO:0006684" "GO:0006685" "GO:0006685" "GO:0007165" "GO:0007568"
## [36] "GO:0014824" "GO:0015774" "GO:0015774" "GO:0016020" "GO:0016021"
## [41] "GO:0030072" "GO:0030072" "GO:0030282" "GO:0030282" "GO:0030324"
## [46] "GO:0030509" "GO:0030509" "GO:0032963" "GO:0032963" "GO:0034614"
## [51] "GO:0035264" "GO:0035264" "GO:0042802" "GO:0042802" "GO:0043491"
## [56] "GO:0043491" "GO:0045429" "GO:0045840" "GO:0045840" "GO:0046872"
## [61] "GO:0048008" "GO:0048008" "GO:0048286" "GO:0048286" "GO:0048661"
## [66] "GO:0048661" "GO:0051216" "GO:0051481" "GO:0060348" "GO:0060541"
## [71] "GO:0061035" "GO:0061035" "GO:0061751" "GO:0061751" "GO:0070301"
## [76] "GO:0070301" "GO:0070314" "GO:0070314" "GO:0071286" "GO:0071286"
## [81] "GO:0071347" "GO:0071356" "GO:0071356" "GO:0071461" "GO:0071461"
## [86] "GO:0071897" "GO:0071897" "GO:0085029" "GO:0085029" "GO:0090494"
## [91] "GO:0090520" "GO:0090520" "GO:0097187" "GO:0097187" "GO:0098868"
## [96] "GO:0098868" "GO:0140014" "GO:0140014" "GO:0140052" "GO:0140052"
## [101] "GO:1900125" "GO:1900126" "GO:1900126" "GO:1901224" "GO:1901653"
## [106] "GO:1901653" "GO:1903543" "GO:2000304"
##
## $`171440`
## [1] "GO:0005654" "GO:0005654" "GO:0046982" "GO:0097237" "GO:0097237"
##
## $`246280`
## [1] "GO:0005290" "GO:0005290" "GO:0006857" "GO:0015031" "GO:0015293"
## [6] "GO:0015817" "GO:0015817" "GO:0016021" "GO:0022857" "GO:0055085"
## [11] "GO:0089709"
Ahora generamos la lista donde se define que genes son de interes y cuales no.
geneList<-rep(0,length(gene.data.fil$ENTREZID))
geneList[gene.data.fil$ENTREZID %in% target.gene$ENTREZID]<-1
names(geneList)<-genevec
geneList<-factor(geneList)
head(geneList)
## 64622 502374 246211 94338 171440 246280
## 0 0 1 0 0 0
## Levels: 0 1
Con estas estructuras podemos construir el objto topGO
GOdata.MF <- new("topGOdata",
ontology = "MF",
description = "MF on up/down regulated genes",
allGenes = geneList,
annot = annFUN.gene2GO,
gene2GO = gene2GO)
##
## Building most specific GOs .....
## ( 2984 GO terms found. )
##
## Build GO DAG topology ..........
## ( 3433 GO terms and 4418 relations. )
##
## Annotating nodes ...............
## ( 4300 genes annotated to the GO terms. )
Ahora hacemos algunos análisis estadísticos
resultFisher.MF.classic <- runTest(GOdata.MF,
algorithm = "classic",
statistic = "fisher")
##
## -- Classic Algorithm --
##
## the algorithm is scoring 868 nontrivial nodes
## parameters:
## test statistic: fisher
allRes.MF.classic <- GenTable(GOdata.MF,
classicFisher = resultFisher.MF.classic,
topNodes = 20)
resultFisher.MF.weight <- runTest(GOdata.MF,
algorithm = "weight",
statistic = "fisher")
##
## -- Weight Algorithm --
##
## The algorithm is scoring 868 nontrivial nodes
## parameters:
## test statistic: fisher : ratio
##
## Level 12: 9 nodes to be scored.
##
## Level 11: 15 nodes to be scored.
##
## Level 10: 20 nodes to be scored.
##
## Level 9: 58 nodes to be scored.
##
## Level 8: 77 nodes to be scored.
##
## Level 7: 102 nodes to be scored.
##
## Level 6: 194 nodes to be scored.
##
## Level 5: 199 nodes to be scored.
##
## Level 4: 140 nodes to be scored.
##
## Level 3: 44 nodes to be scored.
##
## Level 2: 9 nodes to be scored.
##
## Level 1: 1 nodes to be scored.
allRes.MF.weight <- GenTable(GOdata.MF,
classicFisher = resultFisher.MF.weight,
topNodes = 20)
resultFisher.MF.parentchild <- runTest(GOdata.MF,
algorithm = "parentchild",
statistic = "fisher")
##
## -- Parent-Child Algorithm --
##
## the algorithm is scoring 868 nontrivial nodes
## parameters:
## test statistic: fisher : joinFun = union
##
## Level 12: 9 nodes to be scored.
##
## Level 11: 15 nodes to be scored.
##
## Level 10: 20 nodes to be scored.
##
## Level 9: 58 nodes to be scored.
##
## Level 8: 77 nodes to be scored.
##
## Level 7: 102 nodes to be scored.
##
## Level 6: 194 nodes to be scored.
##
## Level 5: 199 nodes to be scored.
##
## Level 4: 140 nodes to be scored.
##
## Level 3: 44 nodes to be scored.
##
## Level 2: 9 nodes to be scored.
allRes.MF.parentchild <- GenTable(GOdata.MF,
classicFisher = resultFisher.MF.parentchild,
topNodes = 20)
kable(head(allRes.MF.classic,20), caption = "FISHER TEST")
| GO.ID | Term | Annotated | Significant | Expected | classicFisher |
|---|---|---|---|---|---|
| GO:0019206 | nucleoside kinase activity | 2 | 2 | 0.09 | 0.0021 |
| GO:0047134 | protein-disulfide reductase activity | 2 | 2 | 0.09 | 0.0021 |
| GO:0016830 | carbon-carbon lyase activity | 23 | 5 | 1.06 | 0.0033 |
| GO:0004332 | fructose-bisphosphate aldolase activity | 3 | 2 | 0.14 | 0.0061 |
| GO:0005042 | netrin receptor activity | 3 | 2 | 0.14 | 0.0061 |
| GO:0043237 | laminin-1 binding | 3 | 2 | 0.14 | 0.0061 |
| GO:0030159 | signaling receptor complex adaptor activ… | 20 | 4 | 0.92 | 0.0118 |
| GO:0016832 | aldehyde-lyase activity | 4 | 2 | 0.18 | 0.0119 |
| GO:0005547 | phosphatidylinositol-3,4,5-trisphosphate… | 12 | 3 | 0.55 | 0.0155 |
| GO:0016668 | oxidoreductase activity, acting on a sul… | 5 | 2 | 0.23 | 0.0192 |
| GO:0043325 | phosphatidylinositol-3,4-bisphosphate bi… | 5 | 2 | 0.23 | 0.0192 |
| GO:0050693 | LBD domain binding | 5 | 2 | 0.23 | 0.0192 |
| GO:0019205 | nucleobase-containing compound kinase ac… | 14 | 3 | 0.64 | 0.0239 |
| GO:0005068 | transmembrane receptor protein tyrosine … | 6 | 2 | 0.28 | 0.0279 |
| GO:0008236 | serine-type peptidase activity | 82 | 8 | 3.77 | 0.0335 |
| GO:0016829 | lyase activity | 82 | 8 | 3.77 | 0.0335 |
| GO:0035259 | glucocorticoid receptor binding | 16 | 3 | 0.74 | 0.0344 |
| GO:0035591 | signaling adaptor activity | 28 | 4 | 1.29 | 0.0374 |
| GO:0005092 | GDP-dissociation inhibitor activity | 7 | 2 | 0.32 | 0.0379 |
| GO:0016831 | carboxy-lyase activity | 17 | 3 | 0.78 | 0.0404 |
kable(head(allRes.MF.weight,20), caption = "FISHER WEIGHTED")
| GO.ID | Term | Annotated | Significant | Expected | classicFisher |
|---|---|---|---|---|---|
| GO:0047134 | protein-disulfide reductase activity | 2 | 2 | 0.09 | 0.0021 |
| GO:0004332 | fructose-bisphosphate aldolase activity | 3 | 2 | 0.14 | 0.0061 |
| GO:0043237 | laminin-1 binding | 3 | 2 | 0.14 | 0.0061 |
| GO:0005042 | netrin receptor activity | 3 | 2 | 0.14 | 0.0061 |
| GO:0030159 | signaling receptor complex adaptor activ… | 20 | 4 | 0.92 | 0.0118 |
| GO:0005547 | phosphatidylinositol-3,4,5-trisphosphate… | 12 | 3 | 0.55 | 0.0155 |
| GO:0043325 | phosphatidylinositol-3,4-bisphosphate bi… | 5 | 2 | 0.23 | 0.0192 |
| GO:0050693 | LBD domain binding | 5 | 2 | 0.23 | 0.0192 |
| GO:0008236 | serine-type peptidase activity | 82 | 8 | 3.77 | 0.0335 |
| GO:0035259 | glucocorticoid receptor binding | 16 | 3 | 0.74 | 0.0344 |
| GO:0005092 | GDP-dissociation inhibitor activity | 7 | 2 | 0.32 | 0.0379 |
| GO:0016831 | carboxy-lyase activity | 17 | 3 | 0.78 | 0.0404 |
| GO:0019206 | nucleoside kinase activity | 2 | 2 | 0.09 | 0.0458 |
| GO:0047381 | dodecanoyl-[acyl-carrier-protein] hydrol… | 1 | 1 | 0.05 | 0.0460 |
| GO:0001587 | Gq/11-coupled serotonin receptor activit… | 1 | 1 | 0.05 | 0.0460 |
| GO:0004878 | complement component C5a receptor activi… | 1 | 1 | 0.05 | 0.0460 |
| GO:0004136 | deoxyadenosine kinase activity | 1 | 1 | 0.05 | 0.0460 |
| GO:0004137 | deoxycytidine kinase activity | 1 | 1 | 0.05 | 0.0460 |
| GO:0004138 | deoxyguanosine kinase activity | 1 | 1 | 0.05 | 0.0460 |
| GO:0042164 | interleukin-12 alpha subunit binding | 1 | 1 | 0.05 | 0.0460 |
kable(head(allRes.MF.parentchild,20), caption = "FISHER PARENTCHILD")
| GO.ID | Term | Annotated | Significant | Expected | classicFisher |
|---|---|---|---|---|---|
| GO:0005042 | netrin receptor activity | 3 | 2 | 0.14 | 0.0017 |
| GO:0047134 | protein-disulfide reductase activity | 2 | 2 | 0.09 | 0.0095 |
| GO:0005547 | phosphatidylinositol-3,4,5-trisphosphate… | 12 | 3 | 0.55 | 0.0112 |
| GO:0043325 | phosphatidylinositol-3,4-bisphosphate bi… | 5 | 2 | 0.23 | 0.0150 |
| GO:0050693 | LBD domain binding | 5 | 2 | 0.23 | 0.0160 |
| GO:0019205 | nucleobase-containing compound kinase ac… | 14 | 3 | 0.64 | 0.0160 |
| GO:0035472 | choriogonadotropin hormone receptor acti… | 1 | 1 | 0.05 | 0.0272 |
| GO:0004964 | luteinizing hormone receptor activity | 1 | 1 | 0.05 | 0.0272 |
| GO:0038106 | choriogonadotropin hormone binding | 1 | 1 | 0.05 | 0.0274 |
| GO:0016668 | oxidoreductase activity, acting on a sul… | 5 | 2 | 0.23 | 0.0289 |
| GO:1905538 | polysome binding | 1 | 1 | 0.05 | 0.0303 |
| GO:0016500 | protein-hormone receptor activity | 11 | 2 | 0.51 | 0.0303 |
| GO:0019206 | nucleoside kinase activity | 2 | 2 | 0.09 | 0.0330 |
| GO:0004471 | malate dehydrogenase (decarboxylating) (… | 1 | 1 | 0.05 | 0.0333 |
| GO:0004473 | malate dehydrogenase (decarboxylating) (… | 1 | 1 | 0.05 | 0.0333 |
| GO:0047016 | cholest-5-ene-3-beta,7-alpha-diol 3-beta… | 1 | 1 | 0.05 | 0.0333 |
| GO:0016830 | carbon-carbon lyase activity | 23 | 5 | 1.06 | 0.0360 |
| GO:0005251 | delayed rectifier potassium channel acti… | 21 | 3 | 0.97 | 0.0370 |
| GO:0016774 | phosphotransferase activity, carboxyl gr… | 1 | 1 | 0.05 | 0.0383 |
| GO:0004001 | adenosine kinase activity | 1 | 1 | 0.05 | 0.0395 |
Tambien podemos generar un analisis de los Procesos Biológicos,usando otro algoritmo para el análisis de Fisher
GOdata.BP <- new("topGOdata",
ontology = "BP",
description = "BP on up/down regulated genes",
allGenes = geneList,
annot = annFUN.gene2GO,
gene2GO = gene2GO)
##
## Building most specific GOs .....
## ( 9766 GO terms found. )
##
## Build GO DAG topology ..........
## ( 13682 GO terms and 32061 relations. )
##
## Annotating nodes ...............
## ( 4387 genes annotated to the GO terms. )
resultFisher <- runTest(GOdata.BP,
algorithm = "elim",
statistic = "fisher")
##
## -- Elim Algorithm --
##
## the algorithm is scoring 4893 nontrivial nodes
## parameters:
## test statistic: fisher
## cutOff: 0.01
##
## Level 19: 1 nodes to be scored (0 eliminated genes)
##
## Level 18: 3 nodes to be scored (0 eliminated genes)
##
## Level 17: 7 nodes to be scored (0 eliminated genes)
##
## Level 16: 13 nodes to be scored (0 eliminated genes)
##
## Level 15: 37 nodes to be scored (0 eliminated genes)
##
## Level 14: 80 nodes to be scored (3 eliminated genes)
##
## Level 13: 153 nodes to be scored (3 eliminated genes)
##
## Level 12: 251 nodes to be scored (43 eliminated genes)
##
## Level 11: 416 nodes to be scored (45 eliminated genes)
##
## Level 10: 556 nodes to be scored (82 eliminated genes)
##
## Level 9: 677 nodes to be scored (89 eliminated genes)
##
## Level 8: 693 nodes to be scored (111 eliminated genes)
##
## Level 7: 704 nodes to be scored (111 eliminated genes)
##
## Level 6: 601 nodes to be scored (142 eliminated genes)
##
## Level 5: 385 nodes to be scored (151 eliminated genes)
##
## Level 4: 199 nodes to be scored (211 eliminated genes)
##
## Level 3: 96 nodes to be scored (256 eliminated genes)
##
## Level 2: 20 nodes to be scored (256 eliminated genes)
##
## Level 1: 1 nodes to be scored (256 eliminated genes)
resultKS <- runTest(GOdata.BP,
algorithm = "elim",
statistic = "KS")
##
## -- Elim Algorithm --
##
## the algorithm is scoring 13682 nontrivial nodes
## parameters:
## test statistic: ks
## cutOff: 0.01
## score order: increasing
##
## Level 19: 4 nodes to be scored (0 eliminated genes)
##
## Level 18: 18 nodes to be scored (0 eliminated genes)
##
## Level 17: 40 nodes to be scored (0 eliminated genes)
##
## Level 16: 84 nodes to be scored (0 eliminated genes)
##
## Level 15: 178 nodes to be scored (12 eliminated genes)
##
## Level 14: 346 nodes to be scored (12 eliminated genes)
##
## Level 13: 636 nodes to be scored (60 eliminated genes)
##
## Level 12: 1089 nodes to be scored (188 eliminated genes)
##
## Level 11: 1515 nodes to be scored (192 eliminated genes)
##
## Level 10: 1859 nodes to be scored (230 eliminated genes)
##
## Level 9: 1982 nodes to be scored (365 eliminated genes)
##
## Level 8: 1892 nodes to be scored (634 eliminated genes)
##
## Level 7: 1653 nodes to be scored (1298 eliminated genes)
##
## Level 6: 1224 nodes to be scored (1658 eliminated genes)
##
## Level 5: 691 nodes to be scored (2144 eliminated genes)
##
## Level 4: 326 nodes to be scored (3153 eliminated genes)
##
## Level 3: 121 nodes to be scored (3248 eliminated genes)
##
## Level 2: 23 nodes to be scored (3435 eliminated genes)
##
## Level 1: 1 nodes to be scored (4295 eliminated genes)
allRes.BP <- GenTable(GOdata.BP,
classic = resultFisher,
elimKS = resultKS,
orderBy = "elimKS",
topNodes=20)
allRes.BP
## GO.ID Term Annotated Significant
## 1 GO:0008150 biological_process 4387 205
## 2 GO:0009987 cellular process 4216 194
## 3 GO:0045471 response to ethanol 192 6
## 4 GO:0048546 digestive tract morphogenesis 22 2
## 5 GO:0051384 response to glucocorticoid 227 13
## 6 GO:0015711 organic anion transport 261 16
## 7 GO:1904646 cellular response to amyloid-beta 32 0
## 8 GO:1901700 response to oxygen-containing compound 1259 50
## 9 GO:0046854 phosphatidylinositol phosphorylation 22 0
## 10 GO:0007608 sensory perception of smell 33 0
## 11 GO:0042755 eating behavior 37 0
## 12 GO:0045039 protein insertion into mitochondrial inn... 4 0
## 13 GO:0042493 response to drug 450 20
## 14 GO:0006814 sodium ion transport 147 9
## 15 GO:0007568 aging 316 9
## 16 GO:0001975 response to amphetamine 40 3
## 17 GO:0051984 positive regulation of chromosome segreg... 9 0
## 18 GO:0032094 response to food 41 0
## 19 GO:0062197 cellular response to chemical stress 216 9
## 20 GO:1904045 cellular response to aldosterone 6 0
## Expected Rank in elimKS classic elimKS
## 1 202.63 1 0.06 < 1e-30
## 2 194.73 2 0.79 7.8e-05
## 3 8.87 3 0.88 0.00075
## 4 1.02 4 0.27 0.00097
## 5 10.48 5 0.24 0.00100
## 6 12.06 6 0.14 0.00110
## 7 1.48 7 1.00 0.00114
## 8 58.15 8 0.95 0.00145
## 9 1.02 9 1.00 0.00158
## 10 1.52 10 1.00 0.00165
## 11 1.71 11 1.00 0.00173
## 12 0.18 12 1.00 0.00177
## 13 20.78 13 0.96 0.00203
## 14 6.79 14 0.23 0.00211
## 15 14.60 15 0.96 0.00232
## 16 1.85 16 0.28 0.00253
## 17 0.42 17 1.00 0.00253
## 18 1.89 18 1.00 0.00253
## 19 9.98 19 0.67 0.00259
## 20 0.28 20 1.00 0.00272
# BiocManager::install("Rgraphviz")
library(Rgraphviz)
## Warning: package 'Rgraphviz' was built under R version 4.0.3
## Loading required package: grid
##
## Attaching package: 'grid'
## The following object is masked from 'package:topGO':
##
## depth
##
## Attaching package: 'Rgraphviz'
## The following objects are masked from 'package:IRanges':
##
## from, to
## The following objects are masked from 'package:S4Vectors':
##
## from, to
showSigOfNodes(GOdata.BP,
score(resultKS),
firstSigNodes = 5,
useInfo = 'all')
## $dag
## A graphNEL graph with directed edges
## Number of Nodes = 26
## Number of Edges = 36
##
## $complete.dag
## [1] "A graph with 26 nodes."