En programación funcional podemos aplicar funciones predefinidas a listas, lo que permite hacer más rapido un flujo de trabajo.
library(purrr)
# Creamos tres listas de tres entradas cada una
x <- list(3, 5, 7)
y <- list(10, 20, 30)
z <- list(13, 25, 37)
# Usamos map2(), que es capaz de paralelizar dos argumentos
map2(x, y, ~ .x + .y) # Suma los elementos de x e y## [[1]]
## [1] 13
##
## [[2]]
## [1] 25
##
## [[3]]
## [1] 37
## [[1]]
## [1] 13
##
## [[2]]
## [1] 25
##
## [[3]]
## [1] 37
## [[1]]
## [1] 13
##
## [[2]]
## [1] 25
##
## [[3]]
## [1] 37
# Creamos una funcion de dos variables
log2ratio <- function(x,y){r <- log2(x/y); return(r)}
# Y usamos map2() para aplicarla
map2(x, y, log2ratio)## [[1]]
## [1] -1.736966
##
## [[2]]
## [1] -2
##
## [[3]]
## [1] -2.099536
# Modificamos log2ratio para que salga como data.frame
log2ratio <- function(x,y){r <- log2(x/y); return(as.data.frame(r))}
# Guardamos los tres resultados a un dataframe
map2_dfr(x, y, log2ratio) # Concatenado por filas (rows)Puedo modificar la función internamente, para que convierta la salida. Una opción poco destructuva es usar una nueva función via purrr::compose()
# Creando una nueva función que modifica la salida
compose(as.data.frame, log2ratio) -> df_log2ratio
map2_dfr(x, y, df_log2ratio) # Concatenado por filas (rows)Ahora, podemos trabajar con más de dos listas. Primero, necesitamos definir una lista de listas.
# Creamos una nueva lista de listas a partir de x,y,z
list(x, y, z) -> l
# pmap() puede trabajar con multiples argumentos
pmap(l, sum)## [[1]]
## [1] 26
##
## [[2]]
## [1] 50
##
## [[3]]
## [1] 74
Para evitar hacer loops, podemos utilizar purrr::rerun() para repetir n veces una función.
## [1] -0.50376513 0.71980601 -1.08204452 0.04587921 -1.51017274
## [[1]]
## [1] 1.3928312 0.3480211 1.3931562 -0.9515196 -0.8684568
##
## [[2]]
## [1] 1.0503883 0.9947814 -0.3484109 -0.4447195 -0.9029446
##
## [[3]]
## [1] 0.4771336 0.2878785 3.0856858 -1.4652644 -0.7364345
## [[1]]
## [1] -0.2470879 -0.9831149 -0.7985096 -0.8070917 -2.4671569
##
## [[2]]
## [1] 0.2818356 3.2059926 -1.1151015 -0.0632868 0.1368702
##
## [[3]]
## [1] -0.4211510 0.1064922 -0.7991513 1.1580691 -0.2492503
# Otras listas
x <- list(10, 14)
y <- list(10, 20, 30,14)
z <- list(100, 14, 30, 9)
list(x, y, z) -> l
# Si queremos desarmar nuestra de listas a un objeto más plano
l %>% unlist # Aplana nuestra lista de listas a un vector## [1] 10 14 10 20 30 14 100 14 30 9
Ahora, si queremos aplicar teoría de grupos a nuestras listas, para unir, interseccionar, etc. las listas, podemos usar reduce()
## [[1]]
## [1] 14
## [[1]]
## [1] 10
##
## [[2]]
## [1] 14
##
## [[3]]
## [1] 20
##
## [[4]]
## [1] 30
##
## [[5]]
## [1] 100
##
## [[6]]
## [1] 9
library(forcats)
library(magrittr)
# Creando nuesto objeto de ejemplo
my.letters <- sample(letters[2:5],10, replace = T) # saca 10 letras que pueden ser b,c,d, ó e.
my.letters %<>% factor # convierte las letras a factores categoricos
# Analizando una lista con factores categoricos
my.letters %>% levels # Cuales son las categorias?## [1] "b" "c" "d" "e"
## [1] b c c e b c c e d b
## Levels: c b e d
Los valores categoricos pueden ser utiles en segmentación de data para analisis, eg. que ruta metabolica esta más fecuentemente alterada,
library(dplyr)
data <- read.csv("https://raw.githubusercontent.com/guru99-edu/R-Programming/master/lahman-batting.csv") %>%
select(c(playerID, yearID, AB, teamID, lgID, G, R, HR, SH)) %>%
arrange(yearID %>% desc)En general, los archivos .CSV son demasiado simples como para guardar metadata sobre el tipo de data que es cada columna. Esto puede llevar a errores, como data de fechas interpretada como caracteres, datos binarios 01 interpretados como numeros, etc.
## Rows: 104,324
## Columns: 9
## $ playerID <chr> "abadfe01", "abreujo02", "adamecr01", "adamsau02", "adamsl...
## $ yearID <int> 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017...
## $ AB <int> 0, 621, 13, 0, 109, 48, 291, 83, 29, 162, 279, 167, 1, 0, ...
## $ teamID <chr> "BOS", "CHA", "COL", "WAS", "ATL", "SLN", "ATL", "DET", "C...
## $ lgID <chr> "AL", "AL", "NL", "NL", "NL", "NL", "NL", "AL", "NL", "AL"...
## $ G <int> 48, 156, 12, 6, 85, 31, 100, 29, 30, 70, 133, 53, 9, 63, 5...
## $ R <int> 0, 95, 1, 0, 19, 4, 42, 14, 0, 30, 40, 24, 0, 0, 34, 0, 0,...
## $ HR <int> 0, 33, 0, 0, 5, 1, 19, 1, 0, 2, 16, 6, 0, 0, 6, 0, 0, 1, 0...
## $ SH <int> 0, 0, 0, 0, 1, 0, 0, 0, 3, 1, 0, 0, 2, 0, 1, 0, 0, 1, 0, 0...
El playerID, yearID, son categoricos, no character ni integrers.
library(tidyverse)
data %>% group_by(yearID) %>% # Se aplica sobre variables categoricas
summarise(mean_AB = mean(AB), mean_HR = mean(HR))data %>% group_by(yearID) %>%
summarise(count_teamIDs = n_distinct(teamID), count_playerIDs = n_distinct(playerID)) #, count_teamIDs = n_distinct(teamIDs))# Esto deberia quedar como un nuevo data.frame, dado que no tenemos una salida del mismo tamaño
# TODO: hacer un plot de esta salida# TODO: interconversiones data.frame lista; lista data.frame
# TODO: ejemplo del uso de map+reduce
compose(`!`,is.double) -> not.double # Creamos una función inversa
# Cuales variables de la tabla son datos categoricos?
data %>% select_if(not.double) %>% names # Columnas no numericas## [1] "playerID" "yearID" "AB" "teamID" "lgID" "G" "R"
## [8] "HR" "SH"
# Convertimos los datos categoricos a factores
data[['playerID']] %<>% as.factor
data[['yearID']] %<>% as.factor
data[['teamID']] %<>% as.factor
# data[['lgID']] %<>% as.factor # TODO: what is this?
# TODO: transformado a lista que aplica contar y eso
# Hace tablas de resumen de frecuencia para cada factor
# ADVERTENCIA: si lo hacen paso a paso, la RAM explota
data %>% select_if(is.factor) %>% as.list %>% map(fct_count)## $playerID
## # A tibble: 19,182 x 2
## f n
## <fct> <int>
## 1 aardsda01 9
## 2 aaronha01 23
## 3 aaronto01 7
## 4 aasedo01 13
## 5 abadan01 3
## 6 abadfe01 9
## 7 abadijo01 2
## 8 abbated01 10
## 9 abbeybe01 6
## 10 abbeych01 5
## # ... with 19,172 more rows
##
## $yearID
## # A tibble: 147 x 2
## f n
## <fct> <int>
## 1 1871 115
## 2 1872 157
## 3 1873 125
## 4 1874 123
## 5 1875 218
## 6 1876 124
## 7 1877 98
## 8 1878 80
## 9 1879 127
## 10 1880 135
## # ... with 137 more rows
##
## $teamID
## # A tibble: 149 x 2
## f n
## <fct> <int>
## 1 ALT 18
## 2 ANA 337
## 3 ARI 925
## 4 ATL 2159
## 5 BAL 2615
## 6 BFN 122
## 7 BFP 26
## 8 BL1 48
## 9 BL2 197
## 10 BL3 36
## # ... with 139 more rows
data %>% group_by(teamID) %>%
nest -> nested.data.by.teamID
nested.data.by.teamID %>%
tidyr::unnest_wider(data) -> data2
data2 %>% filter(teamID == 'BOS') %>% .[['yearID']]## [[1]]
## [1] 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017
## [15] 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017
## [29] 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017
## [43] 2017 2017 2017 2017 2017 2017 2017 2016 2016 2016 2016 2016 2016 2016
## [57] 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016
## [71] 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016
## [85] 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016
## [99] 2016 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015
## [113] 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015
## [127] 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015
## [141] 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 2014 2014 2014 2014
## [155] 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014
## [169] 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014
## [183] 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014
## [197] 2014 2014 2014 2014 2014 2014 2014 2014 2014 2013 2013 2013 2013 2013
## [211] 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013
## [225] 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013
## [239] 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013
## [253] 2013 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012
## [267] 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012
## [281] 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012
## [295] 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012
## [309] 2012 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011
## [323] 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011
## [337] 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011
## [351] 2011 2011 2011 2011 2011 2011 2011 2011 2010 2010 2010 2010 2010 2010
## [365] 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010
## [379] 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010
## [393] 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010
## [407] 2010 2010 2010 2010 2010 2009 2009 2009 2009 2009 2009 2009 2009 2009
## [421] 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009
## [435] 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009
## [449] 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009
## [463] 2009 2008 2008 2008 2008 2008 2008 2008 2008 2008 2008 2008 2008 2008
## [477] 2008 2008 2008 2008 2008 2008 2008 2008 2008 2008 2008 2008 2008 2008
## [491] 2008 2008 2008 2008 2008 2008 2008 2008 2008 2008 2008 2008 2008 2008
## [505] 2008 2008 2008 2008 2008 2008 2007 2007 2007 2007 2007 2007 2007 2007
## [519] 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007
## [533] 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007
## [547] 2007 2007 2007 2007 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006
## [561] 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006
## [575] 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006
## [589] 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006
## [603] 2006 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005
## [617] 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005
## [631] 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005
## [645] 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 2004 2004 2004
## [659] 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004
## [673] 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004
## [687] 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004
## [701] 2004 2004 2004 2004 2004 2003 2003 2003 2003 2003 2003 2003 2003 2003
## [715] 2003 2003 2003 2003 2003 2003 2003 2003 2003 2003 2003 2003 2003 2003
## [729] 2003 2003 2003 2003 2003 2003 2003 2003 2003 2003 2003 2003 2003 2003
## [743] 2003 2003 2003 2003 2003 2003 2003 2003 2003 2003 2002 2002 2002 2002
## [757] 2002 2002 2002 2002 2002 2002 2002 2002 2002 2002 2002 2002 2002 2002
## [771] 2002 2002 2002 2002 2002 2002 2002 2002 2002 2002 2002 2002 2002 2002
## [785] 2002 2002 2002 2002 2002 2002 2002 2001 2001 2001 2001 2001 2001 2001
## [799] 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001
## [813] 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001
## [827] 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 2000
## [841] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000
## [855] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000
## [869] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000
## [883] 2000 2000 2000 2000 2000 2000 2000 2000 2000 1999 1999 1999 1999 1999
## [897] 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999
## [911] 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999
## [925] 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999
## [939] 1999 1998 1998 1998 1998 1998 1998 1998 1998 1998 1998 1998 1998 1998
## [953] 1998 1998 1998 1998 1998 1998 1998 1998 1998 1998 1998 1998 1998 1998
## [967] 1998 1998 1998 1998 1998 1998 1998 1998 1998 1998 1998 1998 1998 1998
## [981] 1998 1998 1998 1998 1998 1998 1997 1997 1997 1997 1997 1997 1997 1997
## [995] 1997 1997 1997 1997 1997 1997 1997 1997 1997 1997 1997 1997 1997 1997
## [1009] 1997 1997 1997 1997 1997 1997 1997 1997 1997 1997 1997 1997 1997 1997
## [1023] 1997 1997 1997 1997 1997 1997 1997 1997 1997 1997 1996 1996 1996 1996
## [1037] 1996 1996 1996 1996 1996 1996 1996 1996 1996 1996 1996 1996 1996 1996
## [1051] 1996 1996 1996 1996 1996 1996 1996 1996 1996 1996 1996 1996 1996 1996
## [1065] 1996 1996 1996 1996 1996 1996 1996 1996 1996 1996 1996 1996 1996 1996
## [1079] 1996 1996 1996 1996 1996 1996 1996 1996 1996 1996 1995 1995 1995 1995
## [1093] 1995 1995 1995 1995 1995 1995 1995 1995 1995 1995 1995 1995 1995 1995
## [1107] 1995 1995 1995 1995 1995 1995 1995 1995 1995 1995 1995 1995 1995 1995
## [1121] 1995 1995 1995 1995 1995 1995 1995 1995 1995 1995 1995 1995 1995 1995
## [1135] 1995 1995 1995 1995 1995 1995 1995 1994 1994 1994 1994 1994 1994 1994
## [1149] 1994 1994 1994 1994 1994 1994 1994 1994 1994 1994 1994 1994 1994 1994
## [1163] 1994 1994 1994 1994 1994 1994 1994 1994 1994 1994 1994 1994 1994 1994
## [1177] 1994 1994 1994 1994 1994 1994 1994 1994 1994 1994 1993 1993 1993 1993
## [1191] 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993
## [1205] 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993
## [1219] 1993 1993 1993 1993 1993 1993 1993 1992 1992 1992 1992 1992 1992 1992
## [1233] 1992 1992 1992 1992 1992 1992 1992 1992 1992 1992 1992 1992 1992 1992
## [1247] 1992 1992 1992 1992 1992 1992 1992 1992 1992 1992 1992 1992 1992 1992
## [1261] 1992 1992 1992 1991 1991 1991 1991 1991 1991 1991 1991 1991 1991 1991
## [1275] 1991 1991 1991 1991 1991 1991 1991 1991 1991 1991 1991 1991 1991 1991
## [1289] 1991 1991 1991 1991 1991 1991 1991 1991 1991 1991 1991 1991 1991 1991
## [1303] 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990
## [1317] 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990
## [1331] 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990
## [1345] 1990 1989 1989 1989 1989 1989 1989 1989 1989 1989 1989 1989 1989 1989
## [1359] 1989 1989 1989 1989 1989 1989 1989 1989 1989 1989 1989 1989 1989 1989
## [1373] 1989 1989 1989 1989 1989 1989 1989 1989 1988 1988 1988 1988 1988 1988
## [1387] 1988 1988 1988 1988 1988 1988 1988 1988 1988 1988 1988 1988 1988 1988
## [1401] 1988 1988 1988 1988 1988 1988 1988 1988 1988 1988 1988 1988 1988 1988
## [1415] 1988 1988 1988 1987 1987 1987 1987 1987 1987 1987 1987 1987 1987 1987
## [1429] 1987 1987 1987 1987 1987 1987 1987 1987 1987 1987 1987 1987 1987 1987
## [1443] 1987 1987 1987 1987 1987 1987 1987 1987 1987 1987 1986 1986 1986 1986
## [1457] 1986 1986 1986 1986 1986 1986 1986 1986 1986 1986 1986 1986 1986 1986
## [1471] 1986 1986 1986 1986 1986 1986 1986 1986 1986 1986 1986 1986 1986 1986
## [1485] 1986 1986 1986 1986 1986 1986 1985 1985 1985 1985 1985 1985 1985 1985
## [1499] 1985 1985 1985 1985 1985 1985 1985 1985 1985 1985 1985 1985 1985 1985
## [1513] 1985 1985 1985 1985 1985 1985 1985 1985 1985 1985 1985 1985 1985 1985
## [1527] 1984 1984 1984 1984 1984 1984 1984 1984 1984 1984 1984 1984 1984 1984
## [1541] 1984 1984 1984 1984 1984 1984 1984 1984 1984 1984 1984 1984 1984 1984
## [1555] 1984 1984 1984 1984 1984 1983 1983 1983 1983 1983 1983 1983 1983 1983
## [1569] 1983 1983 1983 1983 1983 1983 1983 1983 1983 1983 1983 1983 1983 1983
## [1583] 1983 1983 1983 1983 1983 1983 1983 1983 1982 1982 1982 1982 1982 1982
## [1597] 1982 1982 1982 1982 1982 1982 1982 1982 1982 1982 1982 1982 1982 1982
## [1611] 1982 1982 1982 1982 1982 1982 1982 1982 1982 1982 1982 1982 1982 1981
## [1625] 1981 1981 1981 1981 1981 1981 1981 1981 1981 1981 1981 1981 1981 1981
## [1639] 1981 1981 1981 1981 1981 1981 1981 1981 1981 1981 1981 1981 1981 1981
## [1653] 1981 1981 1981 1980 1980 1980 1980 1980 1980 1980 1980 1980 1980 1980
## [1667] 1980 1980 1980 1980 1980 1980 1980 1980 1980 1980 1980 1980 1980 1980
## [1681] 1980 1980 1980 1980 1980 1980 1980 1980 1980 1980 1980 1980 1980 1980
## [1695] 1980 1980 1979 1979 1979 1979 1979 1979 1979 1979 1979 1979 1979 1979
## [1709] 1979 1979 1979 1979 1979 1979 1979 1979 1979 1979 1979 1979 1979 1979
## [1723] 1979 1979 1979 1979 1979 1979 1979 1979 1978 1978 1978 1978 1978 1978
## [1737] 1978 1978 1978 1978 1978 1978 1978 1978 1978 1978 1978 1978 1978 1978
## [1751] 1978 1978 1978 1978 1978 1978 1978 1978 1978 1978 1978 1977 1977 1977
## [1765] 1977 1977 1977 1977 1977 1977 1977 1977 1977 1977 1977 1977 1977 1977
## [1779] 1977 1977 1977 1977 1977 1977 1977 1977 1977 1977 1977 1977 1977 1977
## [1793] 1977 1977 1977 1977 1977 1977 1977 1976 1976 1976 1976 1976 1976 1976
## [1807] 1976 1976 1976 1976 1976 1976 1976 1976 1976 1976 1976 1976 1976 1976
## [1821] 1976 1976 1976 1976 1976 1976 1976 1976 1976 1976 1976 1975 1975 1975
## [1835] 1975 1975 1975 1975 1975 1975 1975 1975 1975 1975 1975 1975 1975 1975
## [1849] 1975 1975 1975 1975 1975 1975 1975 1975 1975 1975 1975 1975 1975 1975
## [1863] 1975 1975 1975 1975 1975 1975 1974 1974 1974 1974 1974 1974 1974 1974
## [1877] 1974 1974 1974 1974 1974 1974 1974 1974 1974 1974 1974 1974 1974 1974
## [1891] 1974 1974 1974 1974 1974 1974 1974 1974 1974 1974 1974 1974 1974 1974
## [1905] 1974 1973 1973 1973 1973 1973 1973 1973 1973 1973 1973 1973 1973 1973
## [1919] 1973 1973 1973 1973 1973 1973 1973 1973 1973 1973 1973 1973 1973 1973
## [1933] 1973 1973 1973 1973 1973 1972 1972 1972 1972 1972 1972 1972 1972 1972
## [1947] 1972 1972 1972 1972 1972 1972 1972 1972 1972 1972 1972 1972 1972 1972
## [1961] 1972 1972 1972 1972 1972 1972 1972 1972 1972 1972 1972 1972 1972 1972
## [1975] 1972 1971 1971 1971 1971 1971 1971 1971 1971 1971 1971 1971 1971 1971
## [1989] 1971 1971 1971 1971 1971 1971 1971 1971 1971 1971 1971 1971 1971 1971
## [2003] 1971 1971 1971 1971 1971 1971 1971 1971 1971 1970 1970 1970 1970 1970
## [2017] 1970 1970 1970 1970 1970 1970 1970 1970 1970 1970 1970 1970 1970 1970
## [2031] 1970 1970 1970 1970 1970 1970 1970 1970 1970 1970 1970 1970 1970 1970
## [2045] 1970 1970 1970 1970 1970 1970 1970 1969 1969 1969 1969 1969 1969 1969
## [2059] 1969 1969 1969 1969 1969 1969 1969 1969 1969 1969 1969 1969 1969 1969
## [2073] 1969 1969 1969 1969 1969 1969 1969 1969 1969 1969 1969 1969 1969 1969
## [2087] 1969 1969 1969 1969 1969 1968 1968 1968 1968 1968 1968 1968 1968 1968
## [2101] 1968 1968 1968 1968 1968 1968 1968 1968 1968 1968 1968 1968 1968 1968
## [2115] 1968 1968 1968 1968 1968 1968 1968 1968 1968 1968 1968 1968 1968 1967
## [2129] 1967 1967 1967 1967 1967 1967 1967 1967 1967 1967 1967 1967 1967 1967
## [2143] 1967 1967 1967 1967 1967 1967 1967 1967 1967 1967 1967 1967 1967 1967
## [2157] 1967 1967 1967 1967 1967 1967 1967 1967 1967 1967 1966 1966 1966 1966
## [2171] 1966 1966 1966 1966 1966 1966 1966 1966 1966 1966 1966 1966 1966 1966
## [2185] 1966 1966 1966 1966 1966 1966 1966 1966 1966 1966 1966 1966 1966 1966
## [2199] 1966 1966 1966 1966 1966 1966 1966 1966 1965 1965 1965 1965 1965 1965
## [2213] 1965 1965 1965 1965 1965 1965 1965 1965 1965 1965 1965 1965 1965 1965
## [2227] 1965 1965 1965 1965 1965 1965 1965 1965 1965 1965 1964 1964 1964 1964
## [2241] 1964 1964 1964 1964 1964 1964 1964 1964 1964 1964 1964 1964 1964 1964
## [2255] 1964 1964 1964 1964 1964 1964 1964 1964 1964 1964 1964 1964 1964 1964
## [2269] 1963 1963 1963 1963 1963 1963 1963 1963 1963 1963 1963 1963 1963 1963
## [2283] 1963 1963 1963 1963 1963 1963 1963 1963 1963 1963 1963 1963 1963 1963
## [2297] 1963 1963 1963 1962 1962 1962 1962 1962 1962 1962 1962 1962 1962 1962
## [2311] 1962 1962 1962 1962 1962 1962 1962 1962 1962 1962 1962 1962 1962 1962
## [2325] 1962 1962 1962 1962 1962 1962 1962 1962 1961 1961 1961 1961 1961 1961
## [2339] 1961 1961 1961 1961 1961 1961 1961 1961 1961 1961 1961 1961 1961 1961
## [2353] 1961 1961 1961 1961 1961 1961 1961 1961 1961 1961 1961 1961 1960 1960
## [2367] 1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 1960
## [2381] 1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 1960
## [2395] 1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 1959 1959
## [2409] 1959 1959 1959 1959 1959 1959 1959 1959 1959 1959 1959 1959 1959 1959
## [2423] 1959 1959 1959 1959 1959 1959 1959 1959 1959 1959 1959 1959 1959 1959
## [2437] 1959 1959 1959 1959 1959 1959 1959 1959 1959 1959 1959 1959 1958 1958
## [2451] 1958 1958 1958 1958 1958 1958 1958 1958 1958 1958 1958 1958 1958 1958
## [2465] 1958 1958 1958 1958 1958 1958 1958 1958 1958 1958 1958 1958 1958 1958
## [2479] 1958 1958 1958 1958 1958 1957 1957 1957 1957 1957 1957 1957 1957 1957
## [2493] 1957 1957 1957 1957 1957 1957 1957 1957 1957 1957 1957 1957 1957 1957
## [2507] 1957 1957 1957 1957 1957 1957 1957 1957 1957 1957 1957 1957 1957 1956
## [2521] 1956 1956 1956 1956 1956 1956 1956 1956 1956 1956 1956 1956 1956 1956
## [2535] 1956 1956 1956 1956 1956 1956 1956 1956 1956 1956 1956 1956 1956 1956
## [2549] 1956 1956 1956 1956 1956 1955 1955 1955 1955 1955 1955 1955 1955 1955
## [2563] 1955 1955 1955 1955 1955 1955 1955 1955 1955 1955 1955 1955 1955 1955
## [2577] 1955 1955 1955 1955 1955 1955 1955 1955 1955 1955 1955 1955 1955 1955
## [2591] 1955 1955 1955 1954 1954 1954 1954 1954 1954 1954 1954 1954 1954 1954
## [2605] 1954 1954 1954 1954 1954 1954 1954 1954 1954 1954 1954 1954 1954 1954
## [2619] 1954 1954 1954 1954 1954 1954 1954 1954 1954 1954 1954 1953 1953 1953
## [2633] 1953 1953 1953 1953 1953 1953 1953 1953 1953 1953 1953 1953 1953 1953
## [2647] 1953 1953 1953 1953 1953 1953 1953 1953 1953 1953 1953 1953 1953 1953
## [2661] 1953 1953 1953 1953 1953 1953 1952 1952 1952 1952 1952 1952 1952 1952
## [2675] 1952 1952 1952 1952 1952 1952 1952 1952 1952 1952 1952 1952 1952 1952
## [2689] 1952 1952 1952 1952 1952 1952 1952 1952 1952 1952 1952 1952 1952 1952
## [2703] 1952 1952 1952 1952 1952 1952 1952 1952 1952 1952 1952 1952 1951 1951
## [2717] 1951 1951 1951 1951 1951 1951 1951 1951 1951 1951 1951 1951 1951 1951
## [2731] 1951 1951 1951 1951 1951 1951 1951 1951 1951 1951 1951 1951 1951 1951
## [2745] 1951 1951 1951 1951 1951 1951 1951 1951 1950 1950 1950 1950 1950 1950
## [2759] 1950 1950 1950 1950 1950 1950 1950 1950 1950 1950 1950 1950 1950 1950
## [2773] 1950 1950 1950 1950 1950 1950 1950 1950 1950 1950 1950 1950 1950 1950
## [2787] 1950 1950 1950 1950 1950 1950 1950 1949 1949 1949 1949 1949 1949 1949
## [2801] 1949 1949 1949 1949 1949 1949 1949 1949 1949 1949 1949 1949 1949 1949
## [2815] 1949 1949 1949 1949 1949 1949 1949 1949 1949 1949 1949 1949 1949 1949
## [2829] 1948 1948 1948 1948 1948 1948 1948 1948 1948 1948 1948 1948 1948 1948
## [2843] 1948 1948 1948 1948 1948 1948 1948 1948 1948 1948 1948 1948 1948 1948
## [2857] 1948 1948 1948 1948 1948 1948 1947 1947 1947 1947 1947 1947 1947 1947
## [2871] 1947 1947 1947 1947 1947 1947 1947 1947 1947 1947 1947 1947 1947 1947
## [2885] 1947 1947 1947 1947 1947 1947 1947 1947 1947 1947 1947 1947 1947 1947
## [2899] 1947 1947 1947 1947 1947 1946 1946 1946 1946 1946 1946 1946 1946 1946
## [2913] 1946 1946 1946 1946 1946 1946 1946 1946 1946 1946 1946 1946 1946 1946
## [2927] 1946 1946 1946 1946 1946 1946 1946 1946 1946 1946 1946 1946 1946 1946
## [2941] 1946 1946 1945 1945 1945 1945 1945 1945 1945 1945 1945 1945 1945 1945
## [2955] 1945 1945 1945 1945 1945 1945 1945 1945 1945 1945 1945 1945 1945 1945
## [2969] 1945 1945 1945 1945 1945 1945 1945 1945 1945 1945 1945 1944 1944 1944
## [2983] 1944 1944 1944 1944 1944 1944 1944 1944 1944 1944 1944 1944 1944 1944
## [2997] 1944 1944 1944 1944 1944 1944 1944 1944 1944 1944 1944 1944 1944 1944
## [3011] 1944 1944 1943 1943 1943 1943 1943 1943 1943 1943 1943 1943 1943 1943
## [3025] 1943 1943 1943 1943 1943 1943 1943 1943 1943 1943 1943 1943 1943 1943
## [3039] 1943 1943 1943 1943 1943 1942 1942 1942 1942 1942 1942 1942 1942 1942
## [3053] 1942 1942 1942 1942 1942 1942 1942 1942 1942 1942 1942 1942 1942 1942
## [3067] 1942 1942 1942 1941 1941 1941 1941 1941 1941 1941 1941 1941 1941 1941
## [3081] 1941 1941 1941 1941 1941 1941 1941 1941 1941 1941 1941 1941 1941 1941
## [3095] 1941 1941 1941 1941 1941 1941 1940 1940 1940 1940 1940 1940 1940 1940
## [3109] 1940 1940 1940 1940 1940 1940 1940 1940 1940 1940 1940 1940 1940 1940
## [3123] 1940 1940 1940 1940 1940 1940 1940 1940 1940 1940 1940 1939 1939 1939
## [3137] 1939 1939 1939 1939 1939 1939 1939 1939 1939 1939 1939 1939 1939 1939
## [3151] 1939 1939 1939 1939 1939 1939 1939 1939 1939 1939 1939 1939 1938 1938
## [3165] 1938 1938 1938 1938 1938 1938 1938 1938 1938 1938 1938 1938 1938 1938
## [3179] 1938 1938 1938 1938 1938 1938 1938 1938 1938 1938 1938 1938 1938 1938
## [3193] 1937 1937 1937 1937 1937 1937 1937 1937 1937 1937 1937 1937 1937 1937
## [3207] 1937 1937 1937 1937 1937 1937 1937 1937 1937 1937 1937 1937 1937 1937
## [3221] 1937 1937 1936 1936 1936 1936 1936 1936 1936 1936 1936 1936 1936 1936
## [3235] 1936 1936 1936 1936 1936 1936 1936 1936 1936 1936 1936 1936 1936 1936
## [3249] 1936 1936 1936 1936 1936 1935 1935 1935 1935 1935 1935 1935 1935 1935
## [3263] 1935 1935 1935 1935 1935 1935 1935 1935 1935 1935 1935 1935 1935 1935
## [3277] 1935 1935 1935 1935 1935 1935 1935 1935 1935 1935 1934 1934 1934 1934
## [3291] 1934 1934 1934 1934 1934 1934 1934 1934 1934 1934 1934 1934 1934 1934
## [3305] 1934 1934 1934 1934 1934 1934 1934 1934 1934 1934 1934 1934 1934 1934
## [3319] 1934 1934 1933 1933 1933 1933 1933 1933 1933 1933 1933 1933 1933 1933
## [3333] 1933 1933 1933 1933 1933 1933 1933 1933 1933 1933 1933 1933 1933 1933
## [3347] 1933 1933 1933 1933 1933 1933 1933 1933 1933 1933 1932 1932 1932 1932
## [3361] 1932 1932 1932 1932 1932 1932 1932 1932 1932 1932 1932 1932 1932 1932
## [3375] 1932 1932 1932 1932 1932 1932 1932 1932 1932 1932 1932 1932 1932 1932
## [3389] 1932 1932 1932 1932 1932 1932 1932 1932 1932 1931 1931 1931 1931 1931
## [3403] 1931 1931 1931 1931 1931 1931 1931 1931 1931 1931 1931 1931 1931 1931
## [3417] 1931 1931 1931 1931 1931 1931 1931 1931 1931 1931 1931 1931 1931 1931
## [3431] 1931 1931 1931 1931 1930 1930 1930 1930 1930 1930 1930 1930 1930 1930
## [3445] 1930 1930 1930 1930 1930 1930 1930 1930 1930 1930 1930 1930 1930 1930
## [3459] 1930 1930 1930 1930 1930 1930 1930 1930 1930 1930 1929 1929 1929 1929
## [3473] 1929 1929 1929 1929 1929 1929 1929 1929 1929 1929 1929 1929 1929 1929
## [3487] 1929 1929 1929 1929 1929 1929 1929 1929 1929 1929 1929 1929 1929 1929
## [3501] 1929 1929 1929 1928 1928 1928 1928 1928 1928 1928 1928 1928 1928 1928
## [3515] 1928 1928 1928 1928 1928 1928 1928 1928 1928 1928 1928 1928 1928 1928
## [3529] 1928 1928 1928 1928 1928 1928 1928 1928 1928 1928 1927 1927 1927 1927
## [3543] 1927 1927 1927 1927 1927 1927 1927 1927 1927 1927 1927 1927 1927 1927
## [3557] 1927 1927 1927 1927 1927 1927 1927 1927 1927 1927 1927 1927 1927 1927
## [3571] 1927 1927 1927 1927 1927 1926 1926 1926 1926 1926 1926 1926 1926 1926
## [3585] 1926 1926 1926 1926 1926 1926 1926 1926 1926 1926 1926 1926 1926 1926
## [3599] 1926 1926 1926 1926 1926 1926 1926 1926 1926 1926 1926 1926 1926 1926
## [3613] 1926 1926 1926 1925 1925 1925 1925 1925 1925 1925 1925 1925 1925 1925
## [3627] 1925 1925 1925 1925 1925 1925 1925 1925 1925 1925 1925 1925 1925 1925
## [3641] 1925 1925 1925 1925 1925 1925 1925 1925 1925 1925 1925 1925 1925 1925
## [3655] 1925 1925 1924 1924 1924 1924 1924 1924 1924 1924 1924 1924 1924 1924
## [3669] 1924 1924 1924 1924 1924 1924 1924 1924 1924 1924 1924 1924 1924 1924
## [3683] 1924 1924 1924 1924 1924 1924 1924 1923 1923 1923 1923 1923 1923 1923
## [3697] 1923 1923 1923 1923 1923 1923 1923 1923 1923 1923 1923 1923 1923 1923
## [3711] 1923 1923 1923 1923 1923 1923 1923 1923 1923 1922 1922 1922 1922 1922
## [3725] 1922 1922 1922 1922 1922 1922 1922 1922 1922 1922 1922 1922 1922 1922
## [3739] 1922 1922 1922 1922 1922 1922 1922 1922 1922 1922 1922 1921 1921 1921
## [3753] 1921 1921 1921 1921 1921 1921 1921 1921 1921 1921 1921 1921 1921 1921
## [3767] 1921 1921 1921 1921 1921 1921 1921 1921 1921 1921 1920 1920 1920 1920
## [3781] 1920 1920 1920 1920 1920 1920 1920 1920 1920 1920 1920 1920 1920 1920
## [3795] 1920 1920 1920 1920 1920 1920 1920 1920 1920 1920 1920 1920 1920 1920
## [3809] 1919 1919 1919 1919 1919 1919 1919 1919 1919 1919 1919 1919 1919 1919
## [3823] 1919 1919 1919 1919 1919 1919 1919 1919 1919 1919 1919 1919 1919 1919
## [3837] 1919 1919 1918 1918 1918 1918 1918 1918 1918 1918 1918 1918 1918 1918
## [3851] 1918 1918 1918 1918 1918 1918 1918 1918 1918 1918 1918 1918 1918 1918
## [3865] 1918 1918 1918 1918 1918 1918 1917 1917 1917 1917 1917 1917 1917 1917
## [3879] 1917 1917 1917 1917 1917 1917 1917 1917 1917 1917 1917 1917 1917 1917
## [3893] 1917 1917 1917 1917 1917 1916 1916 1916 1916 1916 1916 1916 1916 1916
## [3907] 1916 1916 1916 1916 1916 1916 1916 1916 1916 1916 1916 1916 1916 1916
## [3921] 1916 1916 1916 1916 1916 1916 1915 1915 1915 1915 1915 1915 1915 1915
## [3935] 1915 1915 1915 1915 1915 1915 1915 1915 1915 1915 1915 1915 1915 1915
## [3949] 1915 1915 1915 1915 1915 1915 1915 1915 1914 1914 1914 1914 1914 1914
## [3963] 1914 1914 1914 1914 1914 1914 1914 1914 1914 1914 1914 1914 1914 1914
## [3977] 1914 1914 1914 1914 1914 1914 1914 1914 1914 1914 1914 1914 1913 1913
## [3991] 1913 1913 1913 1913 1913 1913 1913 1913 1913 1913 1913 1913 1913 1913
## [4005] 1913 1913 1913 1913 1913 1913 1913 1913 1913 1913 1913 1913 1913 1912
## [4019] 1912 1912 1912 1912 1912 1912 1912 1912 1912 1912 1912 1912 1912 1912
## [4033] 1912 1912 1912 1912 1912 1912 1912 1912 1912 1912 1912 1912 1911 1911
## [4047] 1911 1911 1911 1911 1911 1911 1911 1911 1911 1911 1911 1911 1911 1911
## [4061] 1911 1911 1911 1911 1911 1911 1911 1911 1911 1911 1911 1911 1911 1911
## [4075] 1911 1911 1911 1911 1911 1911 1911 1911 1911 1911 1911 1911 1911 1910
## [4089] 1910 1910 1910 1910 1910 1910 1910 1910 1910 1910 1910 1910 1910 1910
## [4103] 1910 1910 1910 1910 1910 1910 1910 1910 1910 1910 1910 1910 1910 1910
## [4117] 1910 1910 1910 1910 1910 1910 1909 1909 1909 1909 1909 1909 1909 1909
## [4131] 1909 1909 1909 1909 1909 1909 1909 1909 1909 1909 1909 1909 1909 1909
## [4145] 1909 1909 1909 1909 1909 1909 1909 1909 1909 1909 1909 1909 1909 1909
## [4159] 1909 1908 1908 1908 1908 1908 1908 1908 1908 1908 1908 1908 1908 1908
## [4173] 1908 1908 1908 1908 1908 1908 1908 1908 1908 1908 1908 1908 1908 1908
## [4187] 1908 1908 1908 1908 1908 1908 1908 1908 1908 1908 1908 1908 1907 1907
## [4201] 1907 1907 1907 1907 1907 1907 1907 1907 1907 1907 1907 1907 1907 1907
## [4215] 1907 1907 1907 1907 1907 1907 1907 1907 1907 1907 1907 1907 1907 1907
## [4229] 1907 1907 1907 1907 1907 1906 1906 1906 1906 1906 1906 1906 1906 1906
## [4243] 1906 1906 1906 1906 1906 1906 1906 1906 1906 1906 1906 1906 1906 1906
## [4257] 1906 1906 1906 1906 1906 1906 1906 1906 1905 1905 1905 1905 1905 1905
## [4271] 1905 1905 1905 1905 1905 1905 1905 1905 1905 1905 1905 1905 1905 1905
## [4285] 1905 1905 1905 1905 1905 1905 1905 1904 1904 1904 1904 1904 1904 1904
## [4299] 1904 1904 1904 1904 1904 1904 1904 1904 1904 1904 1904 1903 1903 1903
## [4313] 1903 1903 1903 1903 1903 1903 1903 1903 1903 1903 1903 1903 1903 1903
## [4327] 1903 1903 1903 1902 1902 1902 1902 1902 1902 1902 1902 1902 1902 1902
## [4341] 1902 1902 1902 1902 1902 1902 1902 1902 1902 1902 1902 1902 1902 1901
## [4355] 1901 1901 1901 1901 1901 1901 1901 1901 1901 1901 1901 1901 1901 1901
## [4369] 1901 1901 1901 1901 1901 1901 1901 1901 1901
## 147 Levels: 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 ... 2017
## [1] "pathway" "brite" "module" "ko" "genome" "vg"
## [7] "ag" "compound" "glycan" "reaction" "rclass" "enzyme"
## [13] "disease" "drug" "dgroup" "environ" "genes" "ligand"
## [19] "kegg"
paths <- 'brain' # Queremos enfermedades del cerebro
keggFind("disease", paths) %>% # Buscamos enfermedades con "brain"
as.matrix %>% rownames() %>% # (un poco de formato)
keggGet() -> found.kegg # Pedimos toda esta data
# TODO: como hacer un bypass de las limitaciones de API de KEGG# Hacemos un map de pluck de entradas en nuestra lista
map(found.kegg, pluck('ENTRY')) %>%
as.character -> disease.names # Guardado como un vector
map(found.kegg, pluck('GENE')) %>% set_names(disease.names) %>%
enframe( name = "Disease", value = "Genes") %>%
unnest_longer(Genes) -> brain.disease.genesmap(found.kegg, pluck('PATHWAY')) %>% set_names(disease.names) %>%
set_names(disease.names) %>%
enframe( name = "Disease", value = "PATHWAY") %>%
unnest_longer(PATHWAY) -> brain.disease.pathways# Hace una tabla resumen de genes y enfermedades
full_join(brain.disease.pathways,brain.disease.genes, by='Disease' ) -> brain_diseasesbrain_diseases %>%
group_by(Disease) %>%
summarise(Gene_count = n_distinct(Genes),PATHWAY_id_count = n_distinct(PATHWAY_id)) -> brain_diseases_dfbrain_diseases$Genes %>%
str_extract('HSA:\\d+') %>%
str_extract('\\d+') -> brain_diseases.entrezgene_idlibrary(biomaRt)
ensembl <- useMart("ensembl") #para toda la base de datos
Hs.ensembl <- useMart("ensembl",dataset="hsapiens_gene_ensembl") #solo Humanomy.genes <- brain_diseases.entrezgene_id
my.info <- getBM(
attributes=c('name_1006','hgnc_symbol','entrezgene_id'),
filters ='entrezgene_id',
values = my.genes,mart = Hs.ensembl)
my.infoFONDECYT Postdoctoral Fellow, Universidad de Chile, deepen.data@gmail.com↩︎
Pregrado, Universidad de Chile↩︎