This is a Markdown document for explaining about dplyr package. dplyr package is provided many useful functions to manipulate data.
Dplyr was built by C++, so it is faster than plyr package.

library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(RCurl)
## Loading required package: bitops
options(RCurlOptions = list(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl")))
url <- getURL("https://dl.dropboxusercontent.com/u/54341374/data/2014-07-08.csv")
mydf <- read.csv(textConnection(url))
dim(mydf)
## [1] 225468     11
head(mydf)
##   X       date     time   size r_version r_arch      r_os      package
## 1 1 2014-07-08 00:54:41  80589     3.1.0 x86_64   mingw32    htmltools
## 2 2 2014-07-08 00:59:53 321767     3.1.0 x86_64   mingw32      tseries
## 3 3 2014-07-08 00:47:13 748063     3.1.0 x86_64 linux-gnu        party
## 4 4 2014-07-08 00:48:05 606104     3.1.0 x86_64 linux-gnu        Hmisc
## 5 5 2014-07-08 00:46:50  79825     3.0.2 x86_64 linux-gnu       digest
## 6 6 2014-07-08 00:48:04  77681     3.1.0 x86_64 linux-gnu randomForest
##   version country ip_id
## 1   0.2.4      US     1
## 2 0.10-32      US     2
## 3  1.0-15      US     3
## 4  3.14-4      US     3
## 5   0.6.4      CA     4
## 6   4.6-7      US     3
cran <- tbl_df(mydf)
cran
## Source: local data frame [225,468 x 11]
## 
##     X       date     time    size r_version r_arch      r_os      package
## 1   1 2014-07-08 00:54:41   80589     3.1.0 x86_64   mingw32    htmltools
## 2   2 2014-07-08 00:59:53  321767     3.1.0 x86_64   mingw32      tseries
## 3   3 2014-07-08 00:47:13  748063     3.1.0 x86_64 linux-gnu        party
## 4   4 2014-07-08 00:48:05  606104     3.1.0 x86_64 linux-gnu        Hmisc
## 5   5 2014-07-08 00:46:50   79825     3.0.2 x86_64 linux-gnu       digest
## 6   6 2014-07-08 00:48:04   77681     3.1.0 x86_64 linux-gnu randomForest
## 7   7 2014-07-08 00:48:35  393754     3.1.0 x86_64 linux-gnu         plyr
## 8   8 2014-07-08 00:47:30   28216     3.0.2 x86_64 linux-gnu      whisker
## 9   9 2014-07-08 00:54:58    5928        NA     NA        NA         Rcpp
## 10 10 2014-07-08 00:15:35 2206029     3.0.2 x86_64 linux-gnu     hflights
## .. ..        ...      ...     ...       ...    ...       ...          ...
## Variables not shown: version (fctr), country (fctr), ip_id (int)
remove(mydf)
select(cran,ip_id,package,country)
## Source: local data frame [225,468 x 3]
## 
##    ip_id      package country
## 1      1    htmltools      US
## 2      2      tseries      US
## 3      3        party      US
## 4      3        Hmisc      US
## 5      4       digest      CA
## 6      3 randomForest      US
## 7      3         plyr      US
## 8      5      whisker      US
## 9      6         Rcpp      CN
## 10     7     hflights      US
## ..   ...          ...     ...
select(cran,r_arch:country)
## Source: local data frame [225,468 x 5]
## 
##    r_arch      r_os      package version country
## 1  x86_64   mingw32    htmltools   0.2.4      US
## 2  x86_64   mingw32      tseries 0.10-32      US
## 3  x86_64 linux-gnu        party  1.0-15      US
## 4  x86_64 linux-gnu        Hmisc  3.14-4      US
## 5  x86_64 linux-gnu       digest   0.6.4      CA
## 6  x86_64 linux-gnu randomForest   4.6-7      US
## 7  x86_64 linux-gnu         plyr   1.8.1      US
## 8  x86_64 linux-gnu      whisker   0.3-2      US
## 9      NA        NA         Rcpp  0.10.4      CN
## 10 x86_64 linux-gnu     hflights     0.1      US
## ..    ...       ...          ...     ...     ...
select(cran,1:10)
## Source: local data frame [225,468 x 10]
## 
##     X       date     time    size r_version r_arch      r_os      package
## 1   1 2014-07-08 00:54:41   80589     3.1.0 x86_64   mingw32    htmltools
## 2   2 2014-07-08 00:59:53  321767     3.1.0 x86_64   mingw32      tseries
## 3   3 2014-07-08 00:47:13  748063     3.1.0 x86_64 linux-gnu        party
## 4   4 2014-07-08 00:48:05  606104     3.1.0 x86_64 linux-gnu        Hmisc
## 5   5 2014-07-08 00:46:50   79825     3.0.2 x86_64 linux-gnu       digest
## 6   6 2014-07-08 00:48:04   77681     3.1.0 x86_64 linux-gnu randomForest
## 7   7 2014-07-08 00:48:35  393754     3.1.0 x86_64 linux-gnu         plyr
## 8   8 2014-07-08 00:47:30   28216     3.0.2 x86_64 linux-gnu      whisker
## 9   9 2014-07-08 00:54:58    5928        NA     NA        NA         Rcpp
## 10 10 2014-07-08 00:15:35 2206029     3.0.2 x86_64 linux-gnu     hflights
## .. ..        ...      ...     ...       ...    ...       ...          ...
## Variables not shown: version (fctr), country (fctr)
select(cran,-time)
## Source: local data frame [225,468 x 10]
## 
##     X       date    size r_version r_arch      r_os      package version
## 1   1 2014-07-08   80589     3.1.0 x86_64   mingw32    htmltools   0.2.4
## 2   2 2014-07-08  321767     3.1.0 x86_64   mingw32      tseries 0.10-32
## 3   3 2014-07-08  748063     3.1.0 x86_64 linux-gnu        party  1.0-15
## 4   4 2014-07-08  606104     3.1.0 x86_64 linux-gnu        Hmisc  3.14-4
## 5   5 2014-07-08   79825     3.0.2 x86_64 linux-gnu       digest   0.6.4
## 6   6 2014-07-08   77681     3.1.0 x86_64 linux-gnu randomForest   4.6-7
## 7   7 2014-07-08  393754     3.1.0 x86_64 linux-gnu         plyr   1.8.1
## 8   8 2014-07-08   28216     3.0.2 x86_64 linux-gnu      whisker   0.3-2
## 9   9 2014-07-08    5928        NA     NA        NA         Rcpp  0.10.4
## 10 10 2014-07-08 2206029     3.0.2 x86_64 linux-gnu     hflights     0.1
## .. ..        ...     ...       ...    ...       ...          ...     ...
## Variables not shown: country (fctr), ip_id (int)
filter(cran,package=="swirl")
## Source: local data frame [820 x 11]
## 
##       X       date     time   size r_version r_arch         r_os package
## 1    27 2014-07-08 00:17:16 105350     3.0.2 x86_64      mingw32   swirl
## 2   156 2014-07-08 00:22:53  41261     3.1.0 x86_64    linux-gnu   swirl
## 3   358 2014-07-08 00:13:42 105335    2.15.2 x86_64      mingw32   swirl
## 4   593 2014-07-08 00:59:45 105465     3.1.0 x86_64 darwin13.1.0   swirl
## 5   831 2014-07-08 00:55:27 105335     3.0.3 x86_64      mingw32   swirl
## 6   997 2014-07-08 00:33:06  41261     3.1.0 x86_64      mingw32   swirl
## 7  1023 2014-07-08 00:35:36 106393     3.1.0 x86_64      mingw32   swirl
## 8  1144 2014-07-08 00:00:39 106534     3.0.2 x86_64    linux-gnu   swirl
## 9  1402 2014-07-08 00:41:41  41261     3.1.0   i386      mingw32   swirl
## 10 1424 2014-07-08 00:44:49 106393     3.1.0 x86_64    linux-gnu   swirl
## ..  ...        ...      ...    ...       ...    ...          ...     ...
## Variables not shown: version (fctr), country (fctr), ip_id (int)
filter(cran,r_version == "3.1.1" , country == "US")
## Source: local data frame [1,588 x 11]
## 
##        X       date     time    size r_version r_arch         r_os
## 1   2216 2014-07-08 00:48:58  385112     3.1.1 x86_64 darwin13.1.0
## 2  17332 2014-07-08 03:39:57  197459     3.1.1 x86_64 darwin13.1.0
## 3  17465 2014-07-08 03:25:38   23259     3.1.1 x86_64 darwin13.1.0
## 4  18844 2014-07-08 03:59:17  190594     3.1.1 x86_64 darwin13.1.0
## 5  30182 2014-07-08 04:13:15   77683     3.1.1   i386      mingw32
## 6  30193 2014-07-08 04:06:26 2351969     3.1.1   i386      mingw32
## 7  30195 2014-07-08 04:07:09  299080     3.1.1   i386      mingw32
## 8  30217 2014-07-08 04:32:04  568036     3.1.1   i386      mingw32
## 9  30245 2014-07-08 04:10:41  526858     3.1.1   i386      mingw32
## 10 30354 2014-07-08 04:32:51 1763717     3.1.1   i386      mingw32
## ..   ...        ...      ...     ...       ...    ...          ...
## Variables not shown: package (fctr), version (fctr), country (fctr), ip_id
##   (int)
filter(cran,r_version <= "3.0.2" , country == "IN")
## Warning in Ops.factor(structure(c(25L, 25L, 25L, 25L, 23L, 25L, 25L, 23L,
## : '<=' not meaningful for factors
## Source: local data frame [0 x 11]
## 
## Variables not shown: X (int), date (fctr), time (fctr), size (int),
##   r_version (fctr), r_arch (fctr), r_os (fctr), package (fctr), version
##   (fctr), country (fctr), ip_id (int)
filter(cran,!is.na(r_version))
## Source: local data frame [207,205 x 11]
## 
##     X       date     time    size r_version r_arch      r_os      package
## 1   1 2014-07-08 00:54:41   80589     3.1.0 x86_64   mingw32    htmltools
## 2   2 2014-07-08 00:59:53  321767     3.1.0 x86_64   mingw32      tseries
## 3   3 2014-07-08 00:47:13  748063     3.1.0 x86_64 linux-gnu        party
## 4   4 2014-07-08 00:48:05  606104     3.1.0 x86_64 linux-gnu        Hmisc
## 5   5 2014-07-08 00:46:50   79825     3.0.2 x86_64 linux-gnu       digest
## 6   6 2014-07-08 00:48:04   77681     3.1.0 x86_64 linux-gnu randomForest
## 7   7 2014-07-08 00:48:35  393754     3.1.0 x86_64 linux-gnu         plyr
## 8   8 2014-07-08 00:47:30   28216     3.0.2 x86_64 linux-gnu      whisker
## 9  10 2014-07-08 00:15:35 2206029     3.0.2 x86_64 linux-gnu     hflights
## 10 11 2014-07-08 00:15:25  526858     3.0.2 x86_64 linux-gnu         LPCM
## .. ..        ...      ...     ...       ...    ...       ...          ...
## Variables not shown: version (fctr), country (fctr), ip_id (int)
cran2 <- select(cran,size:ip_id)
arrange(cran2,ip_id)
## Source: local data frame [225,468 x 8]
## 
##      size r_version r_arch         r_os     package version country ip_id
## 1   80589     3.1.0 x86_64      mingw32   htmltools   0.2.4      US     1
## 2  180562     3.0.2 x86_64      mingw32        yaml  2.1.13      US     1
## 3  190120     3.1.0   i386      mingw32       babel   0.2-6      US     1
## 4  321767     3.1.0 x86_64      mingw32     tseries 0.10-32      US     2
## 5   52281     3.0.3 x86_64 darwin10.8.0    quadprog   1.5-5      US     2
## 6  876702     3.1.0 x86_64    linux-gnu         zoo  1.7-11      US     2
## 7  321764     3.0.2 x86_64    linux-gnu     tseries 0.10-32      US     2
## 8  876702     3.1.0 x86_64    linux-gnu         zoo  1.7-11      US     2
## 9  321768     3.1.0 x86_64      mingw32     tseries 0.10-32      US     2
## 10 784093     3.1.0 x86_64    linux-gnu strucchange   1.5-0      US     2
## ..    ...       ...    ...          ...         ...     ...     ...   ...
arrange(cran2,desc(ip_id))
## Source: local data frame [225,468 x 8]
## 
##       size r_version r_arch         r_os      package version country
## 1     5933        NA     NA           NA          CPE   1.4.2      CN
## 2   569241     3.1.0 x86_64      mingw32 multcompView   0.1-5      US
## 3   228444     3.1.0 x86_64      mingw32        tourr   0.5.3      NZ
## 4   308962     3.1.0 x86_64 darwin13.1.0          ctv   0.7-9      CN
## 5   950964     3.0.3   i386      mingw32        knitr     1.6      CA
## 6    80185     3.0.3   i386      mingw32    htmltools   0.2.4      CA
## 7  1431750     3.0.3   i386      mingw32        shiny  0.10.0      CA
## 8  2189695     3.1.0 x86_64      mingw32       RMySQL   0.9-3      US
## 9  4818024     3.1.0   i386      mingw32       igraph   0.7.1      US
## 10  197495     3.1.0 x86_64      mingw32         coda  0.16-1      US
## ..     ...       ...    ...          ...          ...     ...     ...
## Variables not shown: ip_id (int)
arrange(cran2,package,ip_id)
## Source: local data frame [225,468 x 8]
## 
##     size r_version r_arch         r_os package version country ip_id
## 1  71677     3.0.3 x86_64 darwin10.8.0      A3   0.9.2      CN  1003
## 2  71672     3.1.0 x86_64    linux-gnu      A3   0.9.2      US  1015
## 3  71677     3.1.0 x86_64      mingw32      A3   0.9.2      IN  1054
## 4  70438     3.0.1 x86_64 darwin10.8.0      A3   0.9.2      CN  1513
## 5  71677        NA     NA           NA      A3   0.9.2      BR  1526
## 6  71892     3.0.2 x86_64    linux-gnu      A3   0.9.2      IN  1542
## 7  71677     3.1.0 x86_64    linux-gnu      A3   0.9.2      ZA  2925
## 8  71672     3.1.0 x86_64      mingw32      A3   0.9.2      IL  3889
## 9  71677     3.0.3 x86_64      mingw32      A3   0.9.2      DE  3917
## 10 71672     3.1.0 x86_64      mingw32      A3   0.9.2      US  4219
## ..   ...       ...    ...          ...     ...     ...     ...   ...
arrange(cran2,country,desc(r_version),ip_id)
## Source: local data frame [225,468 x 8]
## 
##       size r_version r_arch      r_os       package   version country
## 1  1556858     3.1.1   i386   mingw32 RcppArmadillo 0.4.320.0      A1
## 2  1823512     3.1.0 x86_64 linux-gnu          mgcv     1.8-1      A1
## 3    15732     3.1.0   i686 linux-gnu          grnn     0.1.0      A1
## 4  3014840     3.1.0 x86_64   mingw32          Rcpp    0.11.2      A1
## 5   660087     3.1.0   i386   mingw32           xts     0.9-7      A1
## 6   522261     3.1.0   i386   mingw32           FNN       1.1      A1
## 7   522263     3.1.0   i386   mingw32           FNN       1.1      A1
## 8  1676627     3.1.0 x86_64 linux-gnu         rgeos     0.3-5      A1
## 9  2118530     3.1.0 x86_64 linux-gnu     spacetime     1.1-0      A1
## 10 2217180     3.1.0 x86_64   mingw32         gstat    1.0-19      A1
## ..     ...       ...    ...       ...           ...       ...     ...
## Variables not shown: ip_id (int)
cran3 <- select(cran,ip_id,package,size)
mutate(cran3,size_kb = size / 2^10)
## Source: local data frame [225,468 x 4]
## 
##    ip_id      package    size     size_kb
## 1      1    htmltools   80589   78.700195
## 2      2      tseries  321767  314.225586
## 3      3        party  748063  730.530273
## 4      3        Hmisc  606104  591.898438
## 5      4       digest   79825   77.954102
## 6      3 randomForest   77681   75.860352
## 7      3         plyr  393754  384.525391
## 8      5      whisker   28216   27.554688
## 9      6         Rcpp    5928    5.789062
## 10     7     hflights 2206029 2154.325195
## ..   ...          ...     ...         ...
mutate(cran3,size_mb = size / 2^20, size_gb = size_mb / 2^10)
## Source: local data frame [225,468 x 5]
## 
##    ip_id      package    size     size_mb      size_gb
## 1      1    htmltools   80589 0.076855659 7.505435e-05
## 2      2      tseries  321767 0.306860924 2.996689e-04
## 3      3        party  748063 0.713408470 6.966880e-04
## 4      3        Hmisc  606104 0.578025818 5.644783e-04
## 5      4       digest   79825 0.076127052 7.434282e-05
## 6      3 randomForest   77681 0.074082375 7.234607e-05
## 7      3         plyr  393754 0.375513077 3.667120e-04
## 8      5      whisker   28216 0.026908875 2.627820e-05
## 9      6         Rcpp    5928 0.005653381 5.520880e-06
## 10     7     hflights 2206029 2.103833199 2.054525e-03
## ..   ...          ...     ...         ...          ...
mutate(cran3,correct_size=size+1000)
## Source: local data frame [225,468 x 4]
## 
##    ip_id      package    size correct_size
## 1      1    htmltools   80589        81589
## 2      2      tseries  321767       322767
## 3      3        party  748063       749063
## 4      3        Hmisc  606104       607104
## 5      4       digest   79825        80825
## 6      3 randomForest   77681        78681
## 7      3         plyr  393754       394754
## 8      5      whisker   28216        29216
## 9      6         Rcpp    5928         6928
## 10     7     hflights 2206029      2207029
## ..   ...          ...     ...          ...
summarize(cran,avg_bytes=mean(size))
## Source: local data frame [1 x 1]
## 
##   avg_bytes
## 1  844086.5