CRAN download log for tableone

References

Load packages

library(data.table)
library(dplyr)
library(ggplot2)

Download data files and create a unified data frame

## Define start date
startDay <- as.Date('2014-02-01')
## subtract two days not to include non-existent file
endDay   <- as.Date(format(Sys.time(), "%Y-%m-%d")) - 2
## Create all dates
allDays  <- seq(startDay, endDay, by = 'day')
## Extract years
year     <- format(allDays, "%Y")

## Create URLs
urls <- paste0('http://cran-logs.rstudio.com/', year, '/', allDays, '.csv.gz')

## Download files
dirName <- "./cran.log.d/"
for (url in urls) {
    ## Destination file name
    destfile <- paste0(dirName, basename(url))
    ## If not already present download
    if (!file.exists(destfile)) {
        download.file(url = url, destfile = destfile)
    }
}

## Load files in a list
listCsv <- lapply(paste0(dirName, allDays, '.csv.gz'),
                  function(file) {
                      ## Load individual file
                      read.csv(file)
                  })

## Unify as a data frame
dfCran <- do.call(rbind, listCsv)

Data table creation

## Convert to a data table
dtCran <- data.table(dfCran)

# add some keys and define variable types
dtCran[, date    := as.Date(date)]
##                 date     time    size r_version r_arch         r_os       package version country ip_id
##        1: 2014-02-01 00:05:57   64091     3.0.2 x86_64      mingw32        gtable   0.1.2      GB     1
##        2: 2014-02-01 00:05:57 1324004     3.0.2 x86_64 darwin10.8.0           plm   1.4-0      US     2
##        3: 2014-02-01 00:33:17  333896     3.0.1 x86_64 darwin10.8.0 scatterplot3d  0.3-34      US     3
##        4: 2014-02-01 00:23:52   17911     3.0.2 x86_64    linux-gnu      evaluate   0.5.1      PT     4
##        5: 2014-02-01 00:37:37 1431501     3.0.2 x86_64    linux-gnu          maps   2.3-6      FR     5
##       ---                                                                                              
## 27025025: 2014-08-04 23:09:00   21439     3.0.3 x86_64 darwin10.8.0  profileModel   0.5-9      US 11583
## 27025026: 2014-08-04 23:08:15 2347297     3.0.2 x86_64    linux-gnu           VIF     1.0      US    32
## 27025027: 2014-08-04 23:20:02 2719220     3.0.2 x86_64    linux-gnu          Rcpp  0.11.2      DK 11778
## 27025028: 2014-08-04 23:20:04   39829     3.0.1 x86_64      mingw32      labeling     0.2      DK 11778
## 27025029: 2014-08-04 23:23:45   13779     3.1.1 x86_64      mingw32         RJDBC   0.2-4      US     2
dtCran[, package := factor(package)]
##                 date     time    size r_version r_arch         r_os       package version country ip_id
##        1: 2014-02-01 00:05:57   64091     3.0.2 x86_64      mingw32        gtable   0.1.2      GB     1
##        2: 2014-02-01 00:05:57 1324004     3.0.2 x86_64 darwin10.8.0           plm   1.4-0      US     2
##        3: 2014-02-01 00:33:17  333896     3.0.1 x86_64 darwin10.8.0 scatterplot3d  0.3-34      US     3
##        4: 2014-02-01 00:23:52   17911     3.0.2 x86_64    linux-gnu      evaluate   0.5.1      PT     4
##        5: 2014-02-01 00:37:37 1431501     3.0.2 x86_64    linux-gnu          maps   2.3-6      FR     5
##       ---                                                                                              
## 27025025: 2014-08-04 23:09:00   21439     3.0.3 x86_64 darwin10.8.0  profileModel   0.5-9      US 11583
## 27025026: 2014-08-04 23:08:15 2347297     3.0.2 x86_64    linux-gnu           VIF     1.0      US    32
## 27025027: 2014-08-04 23:20:02 2719220     3.0.2 x86_64    linux-gnu          Rcpp  0.11.2      DK 11778
## 27025028: 2014-08-04 23:20:04   39829     3.0.1 x86_64      mingw32      labeling     0.2      DK 11778
## 27025029: 2014-08-04 23:23:45   13779     3.1.1 x86_64      mingw32         RJDBC   0.2-4      US     2
dtCran[, country := factor(country)]
##                 date     time    size r_version r_arch         r_os       package version country ip_id
##        1: 2014-02-01 00:05:57   64091     3.0.2 x86_64      mingw32        gtable   0.1.2      GB     1
##        2: 2014-02-01 00:05:57 1324004     3.0.2 x86_64 darwin10.8.0           plm   1.4-0      US     2
##        3: 2014-02-01 00:33:17  333896     3.0.1 x86_64 darwin10.8.0 scatterplot3d  0.3-34      US     3
##        4: 2014-02-01 00:23:52   17911     3.0.2 x86_64    linux-gnu      evaluate   0.5.1      PT     4
##        5: 2014-02-01 00:37:37 1431501     3.0.2 x86_64    linux-gnu          maps   2.3-6      FR     5
##       ---                                                                                              
## 27025025: 2014-08-04 23:09:00   21439     3.0.3 x86_64 darwin10.8.0  profileModel   0.5-9      US 11583
## 27025026: 2014-08-04 23:08:15 2347297     3.0.2 x86_64    linux-gnu           VIF     1.0      US    32
## 27025027: 2014-08-04 23:20:02 2719220     3.0.2 x86_64    linux-gnu          Rcpp  0.11.2      DK 11778
## 27025028: 2014-08-04 23:20:04   39829     3.0.1 x86_64      mingw32      labeling     0.2      DK 11778
## 27025029: 2014-08-04 23:23:45   13779     3.1.1 x86_64      mingw32         RJDBC   0.2-4      US     2
dtCran[, weekday := weekdays(date)]
##                 date     time    size r_version r_arch         r_os       package version country ip_id  weekday
##        1: 2014-02-01 00:05:57   64091     3.0.2 x86_64      mingw32        gtable   0.1.2      GB     1 Saturday
##        2: 2014-02-01 00:05:57 1324004     3.0.2 x86_64 darwin10.8.0           plm   1.4-0      US     2 Saturday
##        3: 2014-02-01 00:33:17  333896     3.0.1 x86_64 darwin10.8.0 scatterplot3d  0.3-34      US     3 Saturday
##        4: 2014-02-01 00:23:52   17911     3.0.2 x86_64    linux-gnu      evaluate   0.5.1      PT     4 Saturday
##        5: 2014-02-01 00:37:37 1431501     3.0.2 x86_64    linux-gnu          maps   2.3-6      FR     5 Saturday
##       ---                                                                                                       
## 27025025: 2014-08-04 23:09:00   21439     3.0.3 x86_64 darwin10.8.0  profileModel   0.5-9      US 11583   Monday
## 27025026: 2014-08-04 23:08:15 2347297     3.0.2 x86_64    linux-gnu           VIF     1.0      US    32   Monday
## 27025027: 2014-08-04 23:20:02 2719220     3.0.2 x86_64    linux-gnu          Rcpp  0.11.2      DK 11778   Monday
## 27025028: 2014-08-04 23:20:04   39829     3.0.1 x86_64      mingw32      labeling     0.2      DK 11778   Monday
## 27025029: 2014-08-04 23:23:45   13779     3.1.1 x86_64      mingw32         RJDBC   0.2-4      US     2   Monday
dtCran[, week    := strftime(as.POSIXlt(date),format="%Y-%W")]
##                 date     time    size r_version r_arch         r_os       package version country ip_id  weekday
##        1: 2014-02-01 00:05:57   64091     3.0.2 x86_64      mingw32        gtable   0.1.2      GB     1 Saturday
##        2: 2014-02-01 00:05:57 1324004     3.0.2 x86_64 darwin10.8.0           plm   1.4-0      US     2 Saturday
##        3: 2014-02-01 00:33:17  333896     3.0.1 x86_64 darwin10.8.0 scatterplot3d  0.3-34      US     3 Saturday
##        4: 2014-02-01 00:23:52   17911     3.0.2 x86_64    linux-gnu      evaluate   0.5.1      PT     4 Saturday
##        5: 2014-02-01 00:37:37 1431501     3.0.2 x86_64    linux-gnu          maps   2.3-6      FR     5 Saturday
##       ---                                                                                                       
## 27025025: 2014-08-04 23:09:00   21439     3.0.3 x86_64 darwin10.8.0  profileModel   0.5-9      US 11583   Monday
## 27025026: 2014-08-04 23:08:15 2347297     3.0.2 x86_64    linux-gnu           VIF     1.0      US    32   Monday
## 27025027: 2014-08-04 23:20:02 2719220     3.0.2 x86_64    linux-gnu          Rcpp  0.11.2      DK 11778   Monday
## 27025028: 2014-08-04 23:20:04   39829     3.0.1 x86_64      mingw32      labeling     0.2      DK 11778   Monday
## 27025029: 2014-08-04 23:23:45   13779     3.1.1 x86_64      mingw32         RJDBC   0.2-4      US     2   Monday
##              week
##        1: 2014-04
##        2: 2014-04
##        3: 2014-04
##        4: 2014-04
##        5: 2014-04
##       ---        
## 27025025: 2014-31
## 27025026: 2014-31
## 27025027: 2014-31
## 27025028: 2014-31
## 27025029: 2014-31
## set keys
setkey(dtCran, package, date, week, country)

Check download history tableone

## Extract tableone
dtTableOne <- dtCran[dtCran$package %in% "tableone", ]

## Summarize
summary(dtTableOne)
##       date                  time           size         r_version       r_arch               r_os    
##  Min.   :2014-02-19   15:20:15:   7   Min.   : 5933   3.1.0  :428   x86_64 :1023   mingw32     :834  
##  1st Qu.:2014-04-02   14:48:38:   7   1st Qu.:74493   3.0.2  :367   i386   : 251   linux-gnu   :268  
##  Median :2014-05-05   14:32:19:   4   Median :75259   3.0.3  :229   i686   :  16   darwin10.8.0:126  
##  Mean   :2014-05-10   16:48:02:   3   Mean   :68680   3.0.1  : 94   i486   :   3   darwin13.1.0: 53  
##  3rd Qu.:2014-06-15   21:31:44:   3   3rd Qu.:76231   3.1.1  : 74   arm    :   1   darwin9.8.0 :  9  
##  Max.   :2014-08-04   22:18:14:   3   Max.   :78223   (Other):102   (Other):   0   (Other)     :  4  
##                       (Other) :1410                   NA's   :143   NA's   : 143   NA's        :143  
##         package        version       country        ip_id         weekday              week          
##  tableone   :1437   0.6.2  :476   US     :470   Min.   :    1   Length:1437        Length:1437       
##  A3         :   0   0.5.0  :303   JP     :186   1st Qu.: 1676   Class :character   Class :character  
##  abc        :   0   0.4.0  :264   DE     : 76   Median : 3725   Mode  :character   Mode  :character  
##  abcdeFBA   :   0   0.3.5  :114   FR     : 66   Mean   : 4554                                        
##  ABCExtremes:   0   0.3.4  :112   CN     : 65   3rd Qu.: 6813                                        
##  ABCoptim   :   0   0.3.3  : 86   NL     : 57   Max.   :18777                                        
##  (Other)    :   0   (Other): 82   (Other):517
## n of download for each day
dtTableOneByDay <- dtTableOne %.%
    group_by(date) %.%
    summarize(n = n())
dtTableOneByDay
## Source: local data table [167 x 2]
## 
##          date  n
## 1  2014-02-19 10
## 2  2014-02-20  5
## 3  2014-02-21  9
## 4  2014-02-22  4
## 5  2014-02-23  4
## 6  2014-02-24  6
## 7  2014-02-25  5
## 8  2014-02-26 12
## 9  2014-02-27  4
## 10 2014-02-28  4
## ..        ... ..
## Add cumulative numbers
dtTableOneByDay$nCum <- cumsum(dtTableOneByDay$n)

Visualize download history of tableone

## Plot
ggplot(data = dtTableOneByDay,
       mapping = aes(x = date, y = nCum)) +
    layer(geom = "line") +
    layer(geom = "point", mapping = aes(y = n)) +
    theme_bw() +
    theme(legend.key = element_blank()) +
    labs(title = "Cumulative downloads of tableone package")

plot of chunk unnamed-chunk-6