Get KEGG pathway geneset
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
library(tidyverse)
#==== get KEGG pathway geneset ====
system("grep autophagy /Users/zero/Documents/_09_data/enrichr_librarys/geneSetLibrary_KEGG_2016 | pbcopy")
raw_gene <- clipr::read_clip()
gene <-
raw_gene %>%
str_replace_all("\t", "") %>%
str_replace_all("1.0", "") %>%
str_split(",") %>%
.[[1]] %>%
.[-1] %>%
.[-length(.)]
Prepare Search Term
#==== prepare search term ====
library(glue)
##
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
##
## collapse
search_gene <- glue("{gene}[title]")
library(rentrez)
pubmed_count <- function(term = "NUMB"){
res <- entrez_search(db="pubmed",
term=term,
use_history=TRUE)
return(res[["count"]])
}
Pubmed Count
#==== pubmed count ====
df <- data.frame(gene = gene,
search = search_gene,
pubmed_count = search_gene %>%
map( ~ pubmed_count(.x)) %>%
unlist()
) %>%
arrange(desc(pubmed_count))
Add genecard summary
#==== genecard ====
library(rvest)
library(curl)
library(robotstxt)
# check path allowed
paths_allowed("https://www.genecards.org")
# base url
keyword <- df$gene %>% toupper()
url <- glue("https://www.genecards.org/cgi-bin/carddisp.pl?gene={keyword}&keywords={keyword}#summaries")
sum_list <-
url %>%
map(~ read_html(curl(.x, handle = curl::new_handle("useragent" = "Mozilla/5.0")))%>%
html_nodes(".gc-section-header+ .gc-subsection p")%>%
html_text())
df <- df %>%
mutate(summary = sum_list %>% unlist())