library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(RISmed)
if(!require(wordcloud))install.packages("wordcloud")
## Loading required package: wordcloud
## Loading required package: RColorBrewer
library(wordcloud)
library(tidytext)
library(tidyr)
res1 <- EUtilsSummary("data + science, public + health",
type = "esearch",
db = "pubmed",
datetype = "pdat",
retmax = 5000,
mindate = 2000,
maxdate = 2016)
res1
## [1] "((\"EPJ Data Sci\"[Journal] OR (\"data\"[All Fields] AND \"science\"[All Fields]) OR \"data science\"[All Fields]) AND (\"public health\"[MeSH Terms] OR (\"public\"[All Fields] AND \"health\"[All Fields]) OR \"public health\"[All Fields])) AND 2000[PDAT] : 2016[PDAT]"
fetch <- EUtilsGet(res1, type = "efetch", db = "pubmed")
## Warning in Medline(Result, query): NAs introduced by coercion
## Warning in Medline(Result, query): NAs introduced by coercion
titles <- data.frame(title = fetch@ArticleTitle)
titles <- as.character(titles$title)
tidyabst <- data.frame(titles)
tidyabst$titles <- as.character(tidyabst$titles)
cloud <- tidyabst %>%
unnest_tokens(word, titles) %>%
anti_join(stop_words) %>%
count(word, sort = TRUE)
## Joining, by = "word"
cloud %>%
with(wordcloud(word, n, min.freq = 10, max.words = 100, colors = brewer.pal(8, "Dark2")))

ds_bigrams <- tidyabst %>%
unnest_tokens(bigrams, titles, token = "ngrams", n = 2) %>%
count(bigrams, sort =TRUE)
ds_bigrams
## # A tibble: 0 × 2
## # ... with 2 variables: bigrams <chr>, n <int>
bigrams_separated <- ds_bigrams %>%
separate(bigrams, c("word1", "word2"), sep = " ")
bigrams_filtered <- bigrams_separated %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word)
g <- bigrams_filtered[grepl("^0", bigrams_filtered$word1),]
g
## # A tibble: 0 × 3
## # ... with 3 variables: word1 <chr>, word2 <chr>, n <int>
bigrams_filtered %>%
filter(word1 != "0") %>%
count(word1, word2) %>%
arrange(-nn)
## Source: local data frame [0 x 3]
## Groups: word1 [?]
##
## # ... with 3 variables: word1 <chr>, word2 <chr>, nn <int>
bigrams_united <- bigrams_filtered %>%
unite(bigram, word1, word2, sep = " ")
bigrams_united %>%
count(bigram, sort = TRUE)
## # A tibble: 0 × 2
## # ... with 2 variables: bigram <chr>, nn <int>