CRAN Keywords

library(htmltab)
library(DT)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
url <- "https://cran.r-project.org/web/packages/available_packages_by_date.html"
cran <- htmltab(doc = url)
## No encoding supplied: defaulting to UTF-8.
## Argument 'which' was left unspecified. Choosing first table.
dim(cran)
## [1] 17753     3
names(cran)
## [1] "Date"    "Package" "Title"
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
cran$year1 <- as.Date(as.character(cran$Date), format = "%Y")
cran$year  <- year(cran$year1)
table(cran$year)
## 
## 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 
##    2    1    6   19   21   40  304  371  503  730 1018 1170 1717 2191 4689 4971
library(tidytext)

### ALL
cran1 <- cran %>%
  unnest_tokens(word, Title)
data(stop_words)
cran2 <- cran1 %>%
  anti_join(stop_words)
## Joining, by = "word"
cran3= cran2 %>%
  count(word, sort = TRUE) 

library(DT)
datatable(cran3)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
### 2017 and after
cran= subset(cran, year > 2016)
cran1 <- cran %>%
  unnest_tokens(word, Title)
data(stop_words)
cran2 <- cran1 %>%
  anti_join(stop_words)
## Joining, by = "word"
cran3= cran2 %>%
  count(word, sort = TRUE) 

library(DT)
datatable(cran3)
### 2019 and after (bigram)

bigrams <- cran %>%
  unnest_tokens(bigram, Title, token = "ngrams", n = 2)
bigrams2 =bigrams %>%
  count(bigram, sort = TRUE)
datatable(bigrams2)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html