Get KEGG pathway geneset

library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(tidyverse)
#==== get KEGG pathway geneset ====
system("grep autophagy /Users/zero/Documents/_09_data/enrichr_librarys/geneSetLibrary_KEGG_2016 | pbcopy")
raw_gene <- clipr::read_clip() 
gene <- 
  raw_gene %>% 
  str_replace_all("\t", "") %>% 
  str_replace_all("1.0", "") %>% 
  str_split(",") %>% 
  .[[1]] %>% 
  .[-1] %>% 
  .[-length(.)]

Prepare Search Term

#==== prepare search term ====
library(glue)
## 
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
## 
##     collapse
search_gene <- glue("{gene}[title]")

library(rentrez)
pubmed_count <- function(term = "NUMB"){
  res <- entrez_search(db="pubmed", 
                       term=term, 
                       use_history=TRUE)
  return(res[["count"]])
}

Pubmed Count

#==== pubmed count ====
df <- data.frame(gene = gene,
                 search = search_gene,
                 pubmed_count = search_gene %>%
                   map( ~ pubmed_count(.x)) %>%
                   unlist()
) %>% 
  arrange(desc(pubmed_count)) 

Add genecard summary

#==== genecard ====
library(rvest)
library(curl)
library(robotstxt)
# check path allowed
paths_allowed("https://www.genecards.org")
# base url
keyword <- df$gene %>% toupper()
url <- glue("https://www.genecards.org/cgi-bin/carddisp.pl?gene={keyword}&keywords={keyword}#summaries")

sum_list <- 
  url %>% 
  map(~ read_html(curl(.x, handle = curl::new_handle("useragent" = "Mozilla/5.0")))%>%
        html_nodes(".gc-section-header+ .gc-subsection p")%>%
        html_text())
df <- df %>% 
  mutate(summary = sum_list %>% unlist())