Abstracts
stopwords_regex = paste(stopwords('en'), collapse = '\\b|\\b')
stopwords_regex = paste0('\\b', stopwords_regex, '\\b')
tip03$abstract1 = stringr::str_replace_all(tip03$abstract, stopwords_regex, '')
trigram <- tip03 %>%
unnest_tokens(trigram, abstract1, token = "ngrams", n = 3) %>%
count(trigram, sort = TRUE)
datatable(trigram)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
tetragram<- tip03 %>%
unnest_tokens(trigram, abstract1, token = "ngrams", n = 4) %>%
count(trigram, sort = TRUE)
datatable(tetragram)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html