How often was the bottle that was active on 9/30 downloaded in the month of September?
library(ggplot2)
df = read.table('dumped.tab')
names(df) = c('formula', 'downloads', 'days', 'perday', 'cellar_any')
df = df[order(-df$perday),]
rownames(df) = seq_len(NROW(df))
head(df, n=50)
## formula downloads days perday cellar_any
## 1 readline 86557 23 3763.3 True
## 2 openssl 93299 30 3110.0 False
## 3 xz 22980 8 2872.5 True
## 4 git 27655 10 2765.5 False
## 5 glib 17929 8 2241.1 False
## 6 node 26672 13 2051.7 False
## 7 libpng 57616 30 1920.5 True
## 8 libgpg-error 20865 11 1896.8 True
## 9 bash 5367 3 1789.0 False
## 10 cmake 26983 17 1587.2 True
## 11 mysql 7497 5 1499.4 False
## 12 libksba 16063 11 1460.3 True
## 13 pkg-config 43009 30 1433.6 False
## 14 wget 40717 30 1357.2 False
## 15 sqlite 38143 30 1271.4 True
## 16 freetype 34047 30 1134.9 True
## 17 gettext 33445 30 1114.8 False
## 18 autoconf 33310 30 1110.3 False
## 19 go 5456 5 1091.2 False
## 20 libtool 32099 30 1070.0 False
## 21 youtube-dl 2126 2 1063.0 True
## 22 redis 7312 7 1044.6 False
## 23 gobject-introspection 8111 8 1013.9 False
## 24 jpeg 30054 30 1001.8 True
## 25 gdbm 29192 30 973.1 True
## 26 ossp-uuid 18431 19 970.1 True
## 27 pango 7078 8 884.8 False
## 28 automake 25882 30 862.7 False
## 29 imagemagick 25153 30 838.4 False
## 30 python 25090 30 836.3 False
## 31 pcre 24902 30 830.1 True
## 32 libyaml 24727 30 824.2 True
## 33 atk 6392 8 799.0 False
## 34 ansible 774 1 774.0 False
## 35 libtasn1 10946 15 729.7 True
## 36 ghostscript 2864 4 716.0 False
## 37 icu4c 20908 30 696.9 False
## 38 scons 6761 10 676.1 True
## 39 libtiff 18953 30 631.8 True
## 40 mongodb 18549 30 618.3 False
## 41 mercurial 17286 28 617.4 True
## 42 nginx 2463 4 615.8 False
## 43 gmp 18388 30 612.9 True
## 44 boost 6067 10 606.7 True
## 45 postgresql 17207 30 573.6 False
## 46 ruby 5052 9 561.3 False
## 47 libffi 16291 30 543.0 True
## 48 curl 7055 13 542.7 True
## 49 fontconfig 14463 30 482.1 False
## 50 x264 14010 30 467.0 True
summary(df$perday)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 2 5 94 35 3760
ggplot(df, aes(perday)) + stat_ecdf() + labs(y='Quantile', x='Downloads', title='Cumulative distribution of daily download count by bottle') + theme_bw()
ggplot(df, aes(perday)) + geom_density() + labs(y='Density', x='Downloads', title='Bottle daily downloads density function') + theme_bw()
library(jsonlite)
library(magrittr)
obj = stream_in(file("data/openssl.json"))
country.data = as.data.frame(obj$countries[[1]])
names(country.data) = c("Country", "Downloads")
country.data$Country = factor(country.data$Country, rev(country.data$Country))
country.data$Downloads = as.numeric(levels(country.data$Downloads))[country.data$Downloads]
country.data %>% head(10) %>% ggplot(aes(Country, Downloads)) + geom_bar(stat="identity") + coord_flip() + theme_bw() + ggtitle("OpenSSL downloads by country, Sep 2014")