## Examples for searching for packages

## Example 1 Roc function-------------------
## Some R Packages for ROC Curves
## https://rviews.rstudio.com/2019/03/01/some-r-packages-for-roc-curves/

## how to search CRAN for packages to plot ROC curves, 
## and highlight six useful packages.

## Gábor Csárdi’s relatively new package pkgsearch to search through CRAN and see what’s out there. 

## The package_search() function takes a text string as input and uses basic text mining techniques to search all of CRAN. 

library(tidyverse)  # for data manipulation
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dlstats)    # for package download stats
library(pkgsearch)  # for searching packages


## query for searching a number of interesting ROC-related packages.

rocPkg <-  pkg_search(query="ROC",size=200) #87 packages
head(rocPkg)
## # A tibble: 6 x 14
##   score package version title description date                maintainer_name
##   <dbl> <chr>   <pckg_> <chr> <chr>       <dttm>              <chr>          
## 1 9381. pROC    1.16.2  "Dis… "Tools for… 2020-03-19 16:30:07 Xavier Robin   
## 2 4230. caTools 1.18.0  "Too… "Contains … 2020-01-17 08:40:26 Michael Dietze 
## 3 1960. PRROC   1.3.1   "Pre… "Computes … 2018-06-19 09:42:55 Jan Grau       
## 4 1592. surviv… 1.0.3   "Tim… "Compute t… 2013-01-13 19:38:55 Paramita Saha-…
## 5 1510. cvAUC   1.1.0   "Cro… "This pack… 2014-12-09 07:12:38 Erin LeDell    
## 6 1406. precrec 0.11.2  "Cal… "Accurate … 2020-05-28 11:20:02 Takaya Saito   
## # … with 7 more variables: maintainer_email <chr>, revdeps <int>,
## #   downloads_last_month <int>, license <chr>, url <chr>, bugreports <chr>,
## #   package_data <I<list>>
## narrowed down to orphaned packages and packages with a score less than 190.

rocPkgShort <- rocPkg %>% #55 packages
  filter(maintainer_name != "ORPHANED", score > 190) %>%
  select(score, package, downloads_last_month) %>%
  arrange(desc(downloads_last_month))

length(rocPkgShort$package)
## [1] 55
head(rocPkgShort)
## # A tibble: 6 x 3
##   score package        downloads_last_month
##   <dbl> <chr>                         <int>
## 1 4230. caTools                      157700
## 2 9381. pROC                         117782
## 3  924. ROCR                          77549
## 4 1960. PRROC                         11317
## 5  853. sROC                           5823
## 6  229. riskRegression                 4332
## the chosen ones (after browsing the documentation for the packages)
shortList <- c("pROC","precrec","ROCit", "PRROC","ROCR","plotROC")

downloads <- cran_stats(shortList)

head(downloads)
##         start        end downloads package
## 1  2015-01-01 2015-01-31      4617    pROC
## 5  2015-02-01 2015-02-28      4532    pROC
## 9  2015-03-01 2015-03-31      6254    pROC
## 13 2015-04-01 2015-04-30      7285    pROC
## 17 2015-05-01 2015-05-31      9079    pROC
## 21 2015-06-01 2015-06-30      7330    pROC
names(downloads)
## [1] "start"     "end"       "downloads" "package"
ggplot(downloads, aes(end, downloads, group=package, color=package)) +
  geom_line() + geom_point(aes(shape=package)) +
  scale_y_continuous(trans = 'log2')

## Example 2 Cox function--------------------

## search another package for COX function package

coxPkg<- pkg_search(query="COX",size=200)

coxPkgShort <- coxPkg %>% 
  filter(maintainer_name != "ORPHANED", score > 190) %>%
  select(score, package, downloads_last_month) %>%
  arrange(desc(downloads_last_month))
head(coxPkgShort)
## # A tibble: 6 x 3
##   score package  downloads_last_month
##   <dbl> <chr>                   <int>
## 1 4722. survival               181762
## 2 1793. glmnet                  78938
## 3 1224. survey                  60810
## 4 1095. gbm                     34251
## 5  802. spatstat                31082
## 6  845. rms                     29276
cox_shortList <- c("survival","glmnet","survey","gbm","spatstat","rms")  
               
               
cox_downloads <- cran_stats(cox_shortList)

ggplot(cox_downloads, aes(end, downloads, group=package, color=package)) +
  geom_line() + geom_point(aes(shape=package)) +
  scale_y_continuous(trans = 'log2')

## Example 3 excess of mortality------------------------------

excessPkg<-pkg_search(query="excess of mortality",size=200)
head(excessPkg$maintainer_name)
## [1] "Y. Foucher"         "Yohann Foucher"     "Joonas Miettinen"  
## [4] "Mathieu Fauvernier" "Rob J Hyndman"      "Reinhold Kainhofer"
excessPkgShort <- excessPkg %>% 
  filter(maintainer_name != "ORPHANED", score > 100) %>%
  select(score, package, downloads_last_month) %>%
  arrange(desc(downloads_last_month))
head(excessPkgShort)
## # A tibble: 4 x 3
##   score package downloads_last_month
##   <dbl> <chr>                  <int>
## 1  144. popEpi                  2026
## 2  115. survPen                  544
## 3  183. RISCA                    451
## 4  201. ROCt                     201
excess_shortList <- c("popEpi","survPen","RISCA","ROCt")


excess_downloads <- cran_stats(excess_shortList)

ggplot(excess_downloads, aes(end, downloads, group=package, color=package)) +
  geom_line() + geom_point(aes(shape=package)) +
  scale_y_continuous(trans = 'log2')

## end------------------