How often was the bottle that was active on 9/30 downloaded in the month of September?

Top bottles

library(ggplot2)
df = read.table('dumped.tab')
names(df) = c('formula', 'downloads', 'days', 'perday', 'cellar_any')
df = df[order(-df$perday),]
rownames(df) = seq_len(NROW(df))
head(df, n=50)
##                  formula downloads days perday cellar_any
## 1               readline     86557   23 3763.3       True
## 2                openssl     93299   30 3110.0      False
## 3                     xz     22980    8 2872.5       True
## 4                    git     27655   10 2765.5      False
## 5                   glib     17929    8 2241.1      False
## 6                   node     26672   13 2051.7      False
## 7                 libpng     57616   30 1920.5       True
## 8           libgpg-error     20865   11 1896.8       True
## 9                   bash      5367    3 1789.0      False
## 10                 cmake     26983   17 1587.2       True
## 11                 mysql      7497    5 1499.4      False
## 12               libksba     16063   11 1460.3       True
## 13            pkg-config     43009   30 1433.6      False
## 14                  wget     40717   30 1357.2      False
## 15                sqlite     38143   30 1271.4       True
## 16              freetype     34047   30 1134.9       True
## 17               gettext     33445   30 1114.8      False
## 18              autoconf     33310   30 1110.3      False
## 19                    go      5456    5 1091.2      False
## 20               libtool     32099   30 1070.0      False
## 21            youtube-dl      2126    2 1063.0       True
## 22                 redis      7312    7 1044.6      False
## 23 gobject-introspection      8111    8 1013.9      False
## 24                  jpeg     30054   30 1001.8       True
## 25                  gdbm     29192   30  973.1       True
## 26             ossp-uuid     18431   19  970.1       True
## 27                 pango      7078    8  884.8      False
## 28              automake     25882   30  862.7      False
## 29           imagemagick     25153   30  838.4      False
## 30                python     25090   30  836.3      False
## 31                  pcre     24902   30  830.1       True
## 32               libyaml     24727   30  824.2       True
## 33                   atk      6392    8  799.0      False
## 34               ansible       774    1  774.0      False
## 35              libtasn1     10946   15  729.7       True
## 36           ghostscript      2864    4  716.0      False
## 37                 icu4c     20908   30  696.9      False
## 38                 scons      6761   10  676.1       True
## 39               libtiff     18953   30  631.8       True
## 40               mongodb     18549   30  618.3      False
## 41             mercurial     17286   28  617.4       True
## 42                 nginx      2463    4  615.8      False
## 43                   gmp     18388   30  612.9       True
## 44                 boost      6067   10  606.7       True
## 45            postgresql     17207   30  573.6      False
## 46                  ruby      5052    9  561.3      False
## 47                libffi     16291   30  543.0       True
## 48                  curl      7055   13  542.7       True
## 49            fontconfig     14463   30  482.1      False
## 50                  x264     14010   30  467.0       True
summary(df$perday)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1       2       5      94      35    3760
ggplot(df, aes(perday)) + stat_ecdf() + labs(y='Quantile', x='Downloads', title='Cumulative distribution of daily download count by bottle') + theme_bw()

plot of chunk unnamed-chunk-3

ggplot(df, aes(perday)) + geom_density() + labs(y='Density', x='Downloads', title='Bottle daily downloads density function') + theme_bw()

plot of chunk unnamed-chunk-4

library(jsonlite)
library(magrittr)
obj = stream_in(file("data/openssl.json"))
country.data = as.data.frame(obj$countries[[1]])
names(country.data) = c("Country", "Downloads")
country.data$Country = factor(country.data$Country, rev(country.data$Country))
country.data$Downloads = as.numeric(levels(country.data$Downloads))[country.data$Downloads]
country.data %>% head(10) %>% ggplot(aes(Country, Downloads)) + geom_bar(stat="identity") + coord_flip() + theme_bw() + ggtitle("OpenSSL downloads by country, Sep 2014")

plot of chunk unnamed-chunk-5