library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#group_by(Species) %>% summarise(MIN=min(Sepal.Length),
#                                MEAN=mean(Sepal.Length),
#                                MEDIAN=median(Sepal.Length),
#                                MAX=max(Sepal.Length)
#)

iris %>% 
  group_by(Species) %>% 
  summarise_each(funs(min, mean, median, max), 
                 Sepal.Length,Sepal.Width)
## Source: local data frame [3 x 9]
## 
##      Species Sepal.Length_min Sepal.Width_min Sepal.Length_mean
## 1     setosa              4.3             2.3             5.006
## 2 versicolor              4.9             2.0             5.936
## 3  virginica              4.9             2.2             6.588
## Variables not shown: Sepal.Width_mean (dbl), Sepal.Length_median (dbl),
##   Sepal.Width_median (dbl), Sepal.Length_max (dbl), Sepal.Width_max (dbl)
#iris %>% 
#  group_by(Species) %>% 
#  summarise_each(funs(min, mean, median, max), 
#                 start_with("Sepal"))

—————————————————

#Examples
# One function
by_species <- iris %>% group_by(Species)
by_species %>% summarise_each(funs(length))
## Source: local data frame [3 x 5]
## 
##      Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     setosa           50          50           50          50
## 2 versicolor           50          50           50          50
## 3  virginica           50          50           50          50
by_species %>% summarise_each(funs(mean))
## Source: local data frame [3 x 5]
## 
##      Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     setosa        5.006       3.428        1.462       0.246
## 2 versicolor        5.936       2.770        4.260       1.326
## 3  virginica        6.588       2.974        5.552       2.026
by_species %>% summarise_each(funs(mean), Petal.Width)
## Source: local data frame [3 x 2]
## 
##      Species Petal.Width
## 1     setosa       0.246
## 2 versicolor       1.326
## 3  virginica       2.026
by_species %>% summarise_each(funs(mean), matches("Width"))
## Source: local data frame [3 x 3]
## 
##      Species Sepal.Width Petal.Width
## 1     setosa       3.428       0.246
## 2 versicolor       2.770       1.326
## 3  virginica       2.974       2.026
by_species %>% mutate_each(funs(half = . / 2))
## Source: local data frame [150 x 5]
## Groups: Species
## 
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          2.55        1.75         0.70        0.10  setosa
## 2          2.45        1.50         0.70        0.10  setosa
## 3          2.35        1.60         0.65        0.10  setosa
## 4          2.30        1.55         0.75        0.10  setosa
## 5          2.50        1.80         0.70        0.10  setosa
## 6          2.70        1.95         0.85        0.20  setosa
## 7          2.30        1.70         0.70        0.15  setosa
## 8          2.50        1.70         0.75        0.10  setosa
## 9          2.20        1.45         0.70        0.10  setosa
## 10         2.45        1.55         0.75        0.05  setosa
## ..          ...         ...          ...         ...     ...
by_species %>% mutate_each(funs(min_rank))
## Source: local data frame [150 x 5]
## Groups: Species
## 
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1            29          29           12           6  setosa
## 2            17           3           12           6  setosa
## 3            10          13            5           6  setosa
## 4             6           9           25           6  setosa
## 5            21          35           12           6  setosa
## 6            41          45           45          42  setosa
## 7             6          20           12          35  setosa
## 8            21          20           25           6  setosa
## 9             2           2           12           6  setosa
## 10           17           9           25           1  setosa
## ..          ...         ...          ...         ...     ...
# Two functions
by_species %>% summarise_each(funs(min, max))
## Source: local data frame [3 x 9]
## 
##      Species Sepal.Length_min Sepal.Width_min Petal.Length_min
## 1     setosa              4.3             2.3              1.0
## 2 versicolor              4.9             2.0              3.0
## 3  virginica              4.9             2.2              4.5
## Variables not shown: Petal.Width_min (dbl), Sepal.Length_max (dbl),
##   Sepal.Width_max (dbl), Petal.Length_max (dbl), Petal.Width_max (dbl)
by_species %>% summarise_each(funs(min, max), Petal.Width, Sepal.Width)
## Source: local data frame [3 x 5]
## 
##      Species Petal.Width_min Sepal.Width_min Petal.Width_max
## 1     setosa             0.1             2.3             0.6
## 2 versicolor             1.0             2.0             1.8
## 3  virginica             1.4             2.2             2.5
## Variables not shown: Sepal.Width_max (dbl)
by_species %>% summarise_each(funs(min, max), matches("Width"))
## Source: local data frame [3 x 5]
## 
##      Species Sepal.Width_min Petal.Width_min Sepal.Width_max
## 1     setosa             2.3             0.1             4.4
## 2 versicolor             2.0             1.0             3.4
## 3  virginica             2.2             1.4             3.8
## Variables not shown: Petal.Width_max (dbl)
# Alternative function specification
iris %>% summarise_each(funs(ul = length(unique(.))))
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1           35          23           43          22       3
by_species %>% summarise_each(funs(ul = length(unique(.))))
## Source: local data frame [3 x 5]
## 
##      Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     setosa           15          16            9           6
## 2 versicolor           21          14           19           9
## 3  virginica           21          13           20          12
by_species %>% summarise_each(c("min", "max"))
## Source: local data frame [3 x 9]
## 
##      Species Sepal.Length_min Sepal.Width_min Petal.Length_min
## 1     setosa              4.3             2.3              1.0
## 2 versicolor              4.9             2.0              3.0
## 3  virginica              4.9             2.2              4.5
## Variables not shown: Petal.Width_min (dbl), Sepal.Length_max (dbl),
##   Sepal.Width_max (dbl), Petal.Length_max (dbl), Petal.Width_max (dbl)
# Alternative variable specification
summarise_each_(iris, funs(max), names(iris)[-5])
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1          7.9         4.4          6.9         2.5
summarise_each_(iris, funs(max), list(quote(-Species)))
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1          7.9         4.4          6.9         2.5

—————————————————–

#Examples
iris <- tbl_df(iris) # so it prints a little nicer
select(iris, starts_with("Petal"))
## Source: local data frame [150 x 2]
## 
##    Petal.Length Petal.Width
## 1           1.4         0.2
## 2           1.4         0.2
## 3           1.3         0.2
## 4           1.5         0.2
## 5           1.4         0.2
## 6           1.7         0.4
## 7           1.4         0.3
## 8           1.5         0.2
## 9           1.4         0.2
## 10          1.5         0.1
## ..          ...         ...
select(iris, ends_with("Width"))
## Source: local data frame [150 x 2]
## 
##    Sepal.Width Petal.Width
## 1          3.5         0.2
## 2          3.0         0.2
## 3          3.2         0.2
## 4          3.1         0.2
## 5          3.6         0.2
## 6          3.9         0.4
## 7          3.4         0.3
## 8          3.4         0.2
## 9          2.9         0.2
## 10         3.1         0.1
## ..         ...         ...
select(iris, contains("etal"))
## Source: local data frame [150 x 2]
## 
##    Petal.Length Petal.Width
## 1           1.4         0.2
## 2           1.4         0.2
## 3           1.3         0.2
## 4           1.5         0.2
## 5           1.4         0.2
## 6           1.7         0.4
## 7           1.4         0.3
## 8           1.5         0.2
## 9           1.4         0.2
## 10          1.5         0.1
## ..          ...         ...
select(iris, matches(".t."))
## Source: local data frame [150 x 4]
## 
##    Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1           5.1         3.5          1.4         0.2
## 2           4.9         3.0          1.4         0.2
## 3           4.7         3.2          1.3         0.2
## 4           4.6         3.1          1.5         0.2
## 5           5.0         3.6          1.4         0.2
## 6           5.4         3.9          1.7         0.4
## 7           4.6         3.4          1.4         0.3
## 8           5.0         3.4          1.5         0.2
## 9           4.4         2.9          1.4         0.2
## 10          4.9         3.1          1.5         0.1
## ..          ...         ...          ...         ...
select(iris, Petal.Length, Petal.Width)
## Source: local data frame [150 x 2]
## 
##    Petal.Length Petal.Width
## 1           1.4         0.2
## 2           1.4         0.2
## 3           1.3         0.2
## 4           1.5         0.2
## 5           1.4         0.2
## 6           1.7         0.4
## 7           1.4         0.3
## 8           1.5         0.2
## 9           1.4         0.2
## 10          1.5         0.1
## ..          ...         ...
#Examples
iris <- tbl_df(iris) # so it prints a little nicer
select(iris, starts_with("Petal"))
## Source: local data frame [150 x 2]
## 
##    Petal.Length Petal.Width
## 1           1.4         0.2
## 2           1.4         0.2
## 3           1.3         0.2
## 4           1.5         0.2
## 5           1.4         0.2
## 6           1.7         0.4
## 7           1.4         0.3
## 8           1.5         0.2
## 9           1.4         0.2
## 10          1.5         0.1
## ..          ...         ...
select(iris, ends_with("Width"))
## Source: local data frame [150 x 2]
## 
##    Sepal.Width Petal.Width
## 1          3.5         0.2
## 2          3.0         0.2
## 3          3.2         0.2
## 4          3.1         0.2
## 5          3.6         0.2
## 6          3.9         0.4
## 7          3.4         0.3
## 8          3.4         0.2
## 9          2.9         0.2
## 10         3.1         0.1
## ..         ...         ...
select(iris, contains("etal"))
## Source: local data frame [150 x 2]
## 
##    Petal.Length Petal.Width
## 1           1.4         0.2
## 2           1.4         0.2
## 3           1.3         0.2
## 4           1.5         0.2
## 5           1.4         0.2
## 6           1.7         0.4
## 7           1.4         0.3
## 8           1.5         0.2
## 9           1.4         0.2
## 10          1.5         0.1
## ..          ...         ...
select(iris, matches(".t."))
## Source: local data frame [150 x 4]
## 
##    Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1           5.1         3.5          1.4         0.2
## 2           4.9         3.0          1.4         0.2
## 3           4.7         3.2          1.3         0.2
## 4           4.6         3.1          1.5         0.2
## 5           5.0         3.6          1.4         0.2
## 6           5.4         3.9          1.7         0.4
## 7           4.6         3.4          1.4         0.3
## 8           5.0         3.4          1.5         0.2
## 9           4.4         2.9          1.4         0.2
## 10          4.9         3.1          1.5         0.1
## ..          ...         ...          ...         ...
select(iris, Petal.Length, Petal.Width)
## Source: local data frame [150 x 2]
## 
##    Petal.Length Petal.Width
## 1           1.4         0.2
## 2           1.4         0.2
## 3           1.3         0.2
## 4           1.5         0.2
## 5           1.4         0.2
## 6           1.7         0.4
## 7           1.4         0.3
## 8           1.5         0.2
## 9           1.4         0.2
## 10          1.5         0.1
## ..          ...         ...
#------------------------------------------------
#Examples
library(dplyr)

by_cyl <- mtcars %>% group_by(cyl)
# Sample fixed number per group
sample_n(mtcars, 10)
##                    mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Fiat 128          32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Merc 280          19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Honda Civic       30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Toyota Corolla    33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Fiat X1-9         27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Duster 360        14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Chrysler Imperial 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Merc 230          22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Valiant           18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Datsun 710        22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
sample_n(mtcars, 50, replace = TRUE)
##                        mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Toyota Corolla        33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Datsun 710            22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Merc 280              19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Hornet Sportabout     18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Dodge Challenger      15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## Duster 360            14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 450SE            16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Camaro Z28            13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## Chrysler Imperial     14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Porsche 914-2         26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Dodge Challenger.1    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## Merc 450SL            17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Toyota Corolla.1      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Chrysler Imperial.1   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Lincoln Continental   10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Maserati Bora         15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## Merc 230              22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Mazda RX4             21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Volvo 142E            21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
## Honda Civic           30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Chrysler Imperial.2   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Lotus Europa          30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Hornet Sportabout.1   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Merc 240D             24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 280.1            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Datsun 710.1          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Ferrari Dino          19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## AMC Javelin           15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Fiat X1-9             27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Lincoln Continental.1 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Chrysler Imperial.3   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Dodge Challenger.2    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## Ford Pantera L        15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Ferrari Dino.1        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Merc 450SLC           15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Merc 280C             17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Merc 450SL.1          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Fiat 128              32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Merc 450SE.1          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Mazda RX4.1           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Duster 360.1          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## AMC Javelin.1         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Hornet Sportabout.2   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Dodge Challenger.3    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## Mazda RX4.2           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Toyota Corolla.2      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Mazda RX4.3           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Toyota Corolla.3      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## AMC Javelin.2         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Mazda RX4.4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
sample_n(mtcars, 10, weight = mpg)
##                    mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Chrysler Imperial 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Hornet 4 Drive    21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Lotus Europa      30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Honda Civic       30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Hornet Sportabout 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Merc 230          22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Fiat 128          32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Mazda RX4 Wag     21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Ford Pantera L    15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Fiat X1-9         27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
sample_n(by_cyl, 3)
## Source: local data frame [9 x 11]
## Groups: cyl
## 
##    mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## 2 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## 3 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## 4 18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## 5 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## 6 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## 7 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## 8 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## 9 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
sample_n(by_cyl, 10, replace = TRUE)
## Source: local data frame [30 x 11]
## Groups: cyl
## 
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1  30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## 2  21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
## 3  21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
## 4  30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## 5  30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## 6  21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
## 7  30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## 8  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## 9  26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## 10 22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## 11 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## 12 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## 13 21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 14 21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## 15 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## 16 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## 17 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## 18 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## 19 21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 20 21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 21 14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## 22 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## 23 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## 24 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## 25 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## 26 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## 27 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## 28 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## 29 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## 30 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
sample_n(by_cyl, 3, weight = mpg / mean(mpg))
## Source: local data frame [9 x 11]
## Groups: cyl
## 
##    mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## 2 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## 3 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## 4 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## 5 21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## 6 21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 7 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## 8 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## 9 14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
# Sample fixed fraction per group
# Default is to sample all data = randomly resample rows
sample_frac(mtcars)
##                      mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280C           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Fiat 128            32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Honda Civic         30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Toyota Corolla      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Pontiac Firebird    19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## Lotus Europa        30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Fiat X1-9           27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Porsche 914-2       26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Volvo 142E          21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
## Ford Pantera L      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Dodge Challenger    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## Toyota Corona       21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## AMC Javelin         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Ferrari Dino        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
sample_frac(mtcars, 0.1)
##                   mpg cyl disp  hp drat   wt  qsec vs am gear carb
## Ford Pantera L   15.8   8  351 264 4.22 3.17 14.50  0  1    5    4
## Dodge Challenger 15.5   8  318 150 2.76 3.52 16.87  0  0    3    2
## Ferrari Dino     19.7   6  145 175 3.62 2.77 15.50  0  1    5    6
sample_frac(mtcars, 1.5, replace = TRUE)
##                        mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Honda Civic           30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Hornet Sportabout     18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## AMC Javelin           15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Ferrari Dino          19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Toyota Corolla        33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Ferrari Dino.1        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Merc 450SLC           15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Ford Pantera L        15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Ford Pantera L.1      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Camaro Z28            13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## Chrysler Imperial     14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Ford Pantera L.2      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Ferrari Dino.2        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Porsche 914-2         26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Valiant               18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Valiant.1             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## AMC Javelin.1         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Merc 450SL            17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Merc 230              22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 230.1            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Valiant.2             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Lincoln Continental   10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Toyota Corolla.1      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Merc 450SE            16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Datsun 710            22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## AMC Javelin.2         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Merc 230.2            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 230.3            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Porsche 914-2.1       26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Fiat X1-9             27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Duster 360            14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Maserati Bora         15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## Lotus Europa          30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Datsun 710.1          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Cadillac Fleetwood    10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## Merc 450SLC.1         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Merc 280C             17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Ford Pantera L.3      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Lincoln Continental.1 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Merc 280C.1           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Merc 240D             24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Toyota Corona         21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## Camaro Z28.1          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## Hornet Sportabout.1   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Merc 230.4            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Lincoln Continental.2 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Chrysler Imperial.1   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Porsche 914-2.2       26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
sample_frac(mtcars, 0.1, weight = 1 / mpg)
##                mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Merc 240D     24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Toyota Corona 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## Ferrari Dino  19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
sample_frac(by_cyl, 0.2)
## Source: local data frame [6 x 11]
## Groups: cyl
## 
##    mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## 2 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## 3 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## 4 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## 5 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## 6 14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
sample_frac(by_cyl, 1, replace = TRUE)
## Source: local data frame [32 x 11]
## Groups: cyl
## 
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1  27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## 2  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## 3  33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## 4  21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## 5  26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## 6  30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## 7  32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## 8  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## 9  21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## 10 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## 11 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## 12 21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 13 18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## 14 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## 15 18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## 16 21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 17 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## 18 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## 19 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## 20 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## 21 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## 22 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## 23 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## 24 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## 25 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## 26 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## 27 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## 28 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## 29 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## 30 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## 31 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## 32 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3

———————————————

#Examples
df <- expand.grid(x = 1:3, y = 3:1)
df %>% 
  rowwise() %>%
  do(i = seq(.$x, .$y))
## Source: local data frame [9 x 1]
## Groups: <by row>
## 
##          i
## 1 <int[3]>
## 2 <int[2]>
## 3 <int[1]>
## 4 <int[2]>
## 5 <int[1]>
## 6 <int[2]>
## 7 <int[1]>
## 8 <int[2]>
## 9 <int[3]>
.Last.value 
## $help_type
## [1] "html"
#%>% summarise(n = length(i))

———————————————–

#Examples
one <- mtcars[1:10, ]
two <- mtcars[11:32, ]
rbind_list(one, two)
## Source: local data frame [32 x 11]
## 
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1  21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## 2  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 3  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## 4  21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## 5  18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## 6  18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## 7  14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## 8  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## 9  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## 10 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## 11 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## 12 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## 13 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## 14 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## 15 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## 16 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## 17 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## 18 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## 19 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## 20 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## 21 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## 22 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## 23 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## 24 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## 25 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## 26 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## 27 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## 28 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## 29 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## 30 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## 31 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## 32 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
rbind_all(list(one, two))
## Source: local data frame [32 x 11]
## 
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1  21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## 2  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 3  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## 4  21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## 5  18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## 6  18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## 7  14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## 8  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## 9  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## 10 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## 11 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## 12 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## 13 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## 14 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## 15 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## 16 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## 17 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## 18 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## 19 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## 20 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## 21 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## 22 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## 23 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## 24 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## 25 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## 26 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## 27 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## 28 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## 29 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## 30 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## 31 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## 32 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2

———————————————

#Examples
mutate(mtcars, displ_l = disp / 61.0237)
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb  displ_l
## 1  21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4 2.621932
## 2  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4 2.621932
## 3  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1 1.769804
## 4  21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1 4.227866
## 5  18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2 5.899347
## 6  18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1 3.687092
## 7  14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4 5.899347
## 8  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2 2.403984
## 9  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2 2.307300
## 10 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4 2.746474
## 11 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4 2.746474
## 12 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3 4.519556
## 13 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3 4.519556
## 14 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3 4.519556
## 15 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4 7.734700
## 16 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4 7.538055
## 17 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4 7.210313
## 18 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1 1.289663
## 19 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2 1.240502
## 20 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1 1.165121
## 21 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1 1.968088
## 22 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2 5.211090
## 23 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2 4.981671
## 24 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4 5.735477
## 25 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2 6.554830
## 26 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1 1.294579
## 27 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2 1.971365
## 28 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2 1.558411
## 29 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4 5.751864
## 30 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6 2.376126
## 31 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8 4.932510
## 32 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2 1.982836
transmute(mtcars, displ_l = disp / 61.0237)
##     displ_l
## 1  2.621932
## 2  2.621932
## 3  1.769804
## 4  4.227866
## 5  5.899347
## 6  3.687092
## 7  5.899347
## 8  2.403984
## 9  2.307300
## 10 2.746474
## 11 2.746474
## 12 4.519556
## 13 4.519556
## 14 4.519556
## 15 7.734700
## 16 7.538055
## 17 7.210313
## 18 1.289663
## 19 1.240502
## 20 1.165121
## 21 1.968088
## 22 5.211090
## 23 4.981671
## 24 5.735477
## 25 6.554830
## 26 1.294579
## 27 1.971365
## 28 1.558411
## 29 5.751864
## 30 2.376126
## 31 4.932510
## 32 1.982836
mutate(mtcars, cyl = NULL)
##     mpg  disp  hp drat    wt  qsec vs am gear carb
## 1  21.0 160.0 110 3.90 2.620 16.46  0  1    4    4
## 2  21.0 160.0 110 3.90 2.875 17.02  0  1    4    4
## 3  22.8 108.0  93 3.85 2.320 18.61  1  1    4    1
## 4  21.4 258.0 110 3.08 3.215 19.44  1  0    3    1
## 5  18.7 360.0 175 3.15 3.440 17.02  0  0    3    2
## 6  18.1 225.0 105 2.76 3.460 20.22  1  0    3    1
## 7  14.3 360.0 245 3.21 3.570 15.84  0  0    3    4
## 8  24.4 146.7  62 3.69 3.190 20.00  1  0    4    2
## 9  22.8 140.8  95 3.92 3.150 22.90  1  0    4    2
## 10 19.2 167.6 123 3.92 3.440 18.30  1  0    4    4
## 11 17.8 167.6 123 3.92 3.440 18.90  1  0    4    4
## 12 16.4 275.8 180 3.07 4.070 17.40  0  0    3    3
## 13 17.3 275.8 180 3.07 3.730 17.60  0  0    3    3
## 14 15.2 275.8 180 3.07 3.780 18.00  0  0    3    3
## 15 10.4 472.0 205 2.93 5.250 17.98  0  0    3    4
## 16 10.4 460.0 215 3.00 5.424 17.82  0  0    3    4
## 17 14.7 440.0 230 3.23 5.345 17.42  0  0    3    4
## 18 32.4  78.7  66 4.08 2.200 19.47  1  1    4    1
## 19 30.4  75.7  52 4.93 1.615 18.52  1  1    4    2
## 20 33.9  71.1  65 4.22 1.835 19.90  1  1    4    1
## 21 21.5 120.1  97 3.70 2.465 20.01  1  0    3    1
## 22 15.5 318.0 150 2.76 3.520 16.87  0  0    3    2
## 23 15.2 304.0 150 3.15 3.435 17.30  0  0    3    2
## 24 13.3 350.0 245 3.73 3.840 15.41  0  0    3    4
## 25 19.2 400.0 175 3.08 3.845 17.05  0  0    3    2
## 26 27.3  79.0  66 4.08 1.935 18.90  1  1    4    1
## 27 26.0 120.3  91 4.43 2.140 16.70  0  1    5    2
## 28 30.4  95.1 113 3.77 1.513 16.90  1  1    5    2
## 29 15.8 351.0 264 4.22 3.170 14.50  0  1    5    4
## 30 19.7 145.0 175 3.62 2.770 15.50  0  1    5    6
## 31 15.0 301.0 335 3.54 3.570 14.60  0  1    5    8
## 32 21.4 121.0 109 4.11 2.780 18.60  1  1    4    2
#---------------------------------------------
#Examples
library(dplyr)

order_by(10:1, cumsum(1:10))
##  [1] 55 54 52 49 45 40 34 27 19 10
x <- 10:1
y <- 1:10
order_by(x, cumsum(y))
##  [1] 55 54 52 49 45 40 34 27 19 10
df <- data.frame(year = 2000:2005, value = (0:5) ^ 2)
scrambled <- df[sample(nrow(df)), ]
wrong <- mutate(scrambled, running = cumsum(value))
arrange(wrong, year)
##   year value running
## 1 2000     0      51
## 2 2001     1       1
## 3 2002     4      55
## 4 2003     9      10
## 5 2004    16      51
## 6 2005    25      35
right <- mutate(scrambled, running = order_by(year, cumsum(value)))
arrange(right, year)
##   year value running
## 1 2000     0       0
## 2 2001     1       1
## 3 2002     4       5
## 4 2003     9      14
## 5 2004    16      30
## 6 2005    25      55
#------------------------------------------
#Examples
if (require("nycflights13")) {
  carriers <- group_by(flights, carrier)
  summarise(carriers, n())
  mutate(carriers, n = n())
  filter(carriers, n() < 100)
}
## Loading required package: nycflights13
## Source: local data frame [32 x 16]
## Groups: carrier
## 
##    year month day dep_time dep_delay arr_time arr_delay carrier tailnum
## 1  2013     1  30     1222        67     1402       107      OO  N978SW
## 2  2013    11   3     1424        -6     1629        -5      OO  N813SK
## 3  2013    11  10     1443        13     1701        27      OO  N813SK
## 4  2013    11  17     1422        -8     1610       -24      OO  N693CA
## 5  2013    11  25     1803         4     2011        -6      OO  N803SK
## 6  2013    11  30     1648         1     1814         3      OO  N746SK
## 7  2013     6  15     1626        -9     1810       -20      OO  N913EV
## 8  2013     6  22     1846       131     2107       157      OO  N427SW
## 9  2013     8  27     1755       -10     1956         3      OO  N789SK
## 10 2013     8  28     2039       154     2213       140      OO  N790SK
## 11 2013     8  29     1832        27     2039        46      OO  N797SK
## 12 2013     8  30     1930        85     2102        69      OO  N762SK
## 13 2013     9   2       NA        NA       NA        NA      OO  N768SK
## 14 2013     9   3     1759        -6     1959         6      OO  N795SK
## 15 2013     9   4     1803        -2     1941       -12      OO  N780SK
## 16 2013     9   5     1758        -7     1938       -15      OO  N789SK
## 17 2013     9   6     1803        -2     1945        -8      OO  N740SK
## 18 2013     9   8     1754       -11     1946        -7      OO  N726SK
## 19 2013     9   9     1757        -8     1951        -2      OO  N780SK
## 20 2013     9  10     1759        -6     1945        -8      OO  N705SK
## 21 2013     9  11       NA        NA       NA        NA      OO  N728SK
## 22 2013     9  12       NA        NA       NA        NA      OO  N789SK
## 23 2013     9  13     1845        40     2041        48      OO  N702SK
## 24 2013     9  15     1756        -9     1929       -24      OO  N778SK
## 25 2013     9  16     1752       -13     1939       -14      OO  N710SK
## 26 2013     9  17     1755       -10     1937       -16      OO  N794SK
## 27 2013     9  18     1754       -11     1927       -26      OO  N701SK
## 28 2013     9  19     1759        -6     1946        -7      OO  N760SK
## 29 2013     9  20     1758        -7     1929       -24      OO  N766SK
## 30 2013     9  22     1759        -6     1945        -8      OO  N772SK
## 31 2013     9  23     1759        -6     1935       -18      OO  N776SK
## 32 2013     9  24     1751       -14     1937       -16      OO  N785SK
## Variables not shown: flight (int), origin (chr), dest (chr), air_time
##   (dbl), distance (dbl), hour (dbl), minute (dbl)
#-----------------------------------------
#Examples
arrange(mtcars, cyl, disp)
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1  33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## 2  30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## 3  32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## 4  27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## 5  30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## 6  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## 7  21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## 8  26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## 9  21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
## 10 22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## 11 24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## 12 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## 13 21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## 14 21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 15 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## 16 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## 17 18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## 18 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## 19 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## 20 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## 21 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## 22 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## 23 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## 24 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## 25 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## 26 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## 27 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## 28 14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## 29 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## 30 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## 31 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## 32 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
arrange(mtcars, desc(disp))
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## 2  10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## 3  14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## 4  19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## 5  18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## 6  14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## 7  15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## 8  13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## 9  15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## 10 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## 11 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## 12 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## 13 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## 14 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## 15 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## 16 18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## 17 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## 18 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## 19 21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## 20 21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 21 24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## 22 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## 23 22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## 24 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
## 25 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## 26 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## 27 22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## 28 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## 29 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## 30 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## 31 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## 32 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1

————————————————–


#Examples
library(dplyr)

if (require("microbenchmark") && has_lahman()) {
  lahman_local <- lahman_srcs("df", "dt")
  teams <- lapply(lahman_local, function(x) x %>% tbl("Teams"))
  compare_tbls(teams, function(x) x %>% filter(yearID == 2010))
  bench_tbls(teams, function(x) x %>% filter(yearID == 2010))
  # You can also supply arbitrary additional arguments to bench_tbls
  # if there are other operations you'd like to compare.
  bench_tbls(teams, function(x) x %>% filter(yearID == 2010),
             base = subset(Lahman::Teams, yearID == 2010))

  # A more complicated example using multiple tables
  setup <- function(src) {
    list(
      src %>% tbl("Batting") %>% filter(stint == 1) %>% select(playerID:H),
      src %>% tbl("Master") %>% select(playerID, birthYear)
    )
  }
  two_tables <- lapply(lahman_local, setup)
  op <- function(tbls) {
    semi_join(tbls[[1]], tbls[[2]], by = "playerID")
  }
  # compare_tbls(two_tables, op)
  bench_tbls(two_tables, op, times = 2)
}

————————————–

#Examples
x <- rnorm(1e2)
x[between(x, -1, 1)]
##  [1] -0.74514504 -0.88052357  0.85416135  0.60696128 -0.86278670
##  [6] -0.48890137 -0.55613603  0.51548160  0.54895192  0.09681148
## [11] -0.55527200 -0.59463210 -0.25493668 -0.59350103  0.47608594
## [16]  0.13219356 -0.09316145 -0.41946417  0.43699721 -0.13482535
## [21]  0.19949309  0.67237610 -0.15521628 -0.07728611 -0.86609228
## [26] -0.71164465 -0.70987854 -0.41268549 -0.13551132 -0.94724260
## [31]  0.50409441 -0.02076468  0.60846707  0.53619117  0.04492938
## [36]  0.62336881 -0.17632775 -0.84539537  0.84669174 -0.11654251
## [41]  0.60434531  0.40685478  0.59458089 -0.63614126  0.54816286
## [46] -0.98417697 -0.85356170  0.31287356  0.57445725 -0.83425817
## [51]  0.10005099  0.65569335 -0.60177045 -0.63651447  0.45174517
## [56] -0.41951984  0.16901617 -0.23651226  0.08232645 -0.49527933
## [61] -0.06836192 -0.05942266  0.60916248 -0.30171271  0.43767429
## [66] -0.50610181  0.35060795
#Examples
build_sql("SELECT * FROM TABLE")
## <SQL> SELECT * FROM TABLE
x <- "TABLE"
build_sql("SELECT * FROM ", x)
## <SQL> SELECT * FROM 'TABLE'
build_sql("SELECT * FROM ", ident(x))
## <SQL> SELECT * FROM "TABLE"
build_sql("SELECT * FROM ", sql(x))
## <SQL> SELECT * FROM TABLE
# http://xkcd.com/327/
name <- "Robert'); DROP TABLE Students;--"
build_sql("INSERT INTO Students (Name) VALUES (", name, ")")
## <SQL> INSERT INTO Students (Name) VALUES ('Robert''); DROP TABLE Students;--')
#Examples
# If you're performing many operations you can either do step by step
if (require("nycflights13")) {
  a1 <- group_by(flights, year, month, day)
  a2 <- select(a1, arr_delay, dep_delay)
  a3 <- summarise(a2,
                  arr = mean(arr_delay, na.rm = TRUE),
                  dep = mean(dep_delay, na.rm = TRUE))
  a4 <- filter(a3, arr > 30 | dep > 30)
  # If you don't want to save the intermediate results, you need to
  # wrap the functions:
  filter(
    summarise(
      select(
        group_by(flights, year, month, day),
        arr_delay, dep_delay
      ),
      arr = mean(arr_delay, na.rm = TRUE),
      dep = mean(dep_delay, na.rm = TRUE)
    ),
    arr > 30 | dep > 30
  )
  # This is difficult to read because the order of the operations is from
  # inside to out, and the arguments are a long way away from the function.
  # Alternatively you can use chain or %>% to sequence the operations
  # linearly:
  flights %>%
    group_by(year, month, day) %>%
    select(arr_delay, dep_delay) %>%
    summarise(
      arr = mean(arr_delay, na.rm = TRUE),
      dep = mean(dep_delay, na.rm = TRUE)
    ) %>%
    filter(arr > 30 | dep > 30)
}
## Source: local data frame [49 x 5]
## Groups: year, month
## 
##    year month day      arr      dep
## 1  2013     1  16 34.24736 24.61287
## 2  2013     1  31 32.60285 28.65836
## 3  2013     2  11 36.29009 39.07360
## 4  2013     2  27 31.25249 37.76327
## 5  2013     3   8 85.86216 83.53692
## 6  2013     3  18 41.29189 30.11796
## 7  2013     4  10 38.41231 33.02368
## 8  2013     4  12 36.04814 34.83843
## 9  2013     4  18 36.02848 34.91536
## 10 2013     4  19 47.91170 46.12783
## 11 2013     4  22 37.81217 30.64255
## 12 2013     4  25 33.68125 23.33956
## 13 2013     5   8 39.60918 43.21778
## 14 2013     5  23 61.97090 51.14472
## 15 2013     5  24 24.25742 30.34072
## 16 2013     6   2 26.07552 34.01337
## 17 2013     6  10 28.02229 30.61945
## 18 2013     6  13 63.75369 45.79083
## 19 2013     6  18 37.64803 35.95077
## 20 2013     6  24 51.17681 47.15742
## 21 2013     6  25 41.51368 43.06303
## 22 2013     6  26 27.31741 30.61175
## 23 2013     6  27 44.78330 40.89123
## 24 2013     6  28 44.97685 48.82778
## 25 2013     6  30 43.51028 44.18818
## 26 2013     7   1 58.28050 56.23383
## 27 2013     7   7 40.30638 36.61745
## 28 2013     7   8 29.64885 37.29665
## 29 2013     7   9 31.33437 30.71150
## 30 2013     7  10 59.62648 52.86070
## 31 2013     7  22 62.76340 46.66705
## 32 2013     7  23 44.95982 44.74169
## 33 2013     7  28 49.83178 37.71016
## 34 2013     8   1 35.98926 34.57403
## 35 2013     8   8 55.48116 43.34995
## 36 2013     8   9 43.31364 34.69190
## 37 2013     8  22 29.97674 33.60042
## 38 2013     8  28 35.20307 40.52689
## 39 2013     9   2 45.51843 53.02955
## 40 2013     9  12 58.91242 49.95875
## 41 2013    10   7 39.01726 39.14671
## 42 2013    10  11 18.92299 31.23184
## 43 2013    12   5 51.66625 52.32799
## 44 2013    12   8 36.91180 21.51534
## 45 2013    12   9 42.57556 34.80022
## 46 2013    12  10 44.50880 26.46549
## 47 2013    12  14 46.39750 28.36155
## 48 2013    12  17 55.87186 40.70560
## 49 2013    12  23 32.22604 32.25415
#Examples
if (require("RSQLite") && has_lahman("sqlite")) {
  batting <- tbl(lahman_sqlite(), "Batting")
  remote <- select(filter(batting, yearID > 2010 && stint == 1), playerID:H)
  remote2 <- collapse(remote)
  cached <- compute(remote)
  local <- collect(remote)
}
## Loading required package: RSQLite
## Loading required package: DBI
## Creating table: AllstarFull
## Creating table: Appearances
## Creating table: AwardsManagers
## Creating table: AwardsPlayers
## Creating table: AwardsShareManagers
## Creating table: AwardsSharePlayers
## Creating table: Batting
## Creating table: BattingPost
## Creating table: Fielding
## Creating table: FieldingOF
## Creating table: FieldingPost
## Creating table: HallOfFame
## Creating table: LahmanData
## Creating table: Managers
## Creating table: ManagersHalf
## Creating table: Master
## Creating table: Pitching
## Creating table: PitchingPost
## Creating table: Salaries
## Creating table: Schools
## Creating table: SchoolsPlayers
## Creating table: SeriesPost
## Creating table: Teams
## Creating table: TeamsFranchises
## Creating table: TeamsHalf
#Examples
if (require("RSQLite") && require("RSQLite.extfuns")) {
  db <- src_sqlite(tempfile(), create = TRUE)
  iris2 <- copy_to(db, iris)
  mtcars$model <- rownames(mtcars)
  mtcars2 <- copy_to(db, mtcars, indexes = list("model"))
  explain(filter(mtcars2, model == "Hornet 4 Drive"))
  # Note that tables are temporary by default, so they're not
  # visible from other connections to the same database.
  src_tbls(db)
  db2 <- src_sqlite(db$path)
  src_tbls(db2)
}
## Loading required package: RSQLite.extfuns
## <SQL>
## SELECT "mpg", "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am", "gear", "carb", "model"
## FROM "mtcars"
## WHERE "model" = 'Hornet 4 Drive'
## 
## 
## <PLAN>
##   selectid order from
## 1        0     0    0
##                                                   detail
## 1 SEARCH TABLE mtcars USING INDEX mtcars_model (model=?)
## character(0)
#Examples
a <- 1:5
data_frame(a, b = a * 2)
## Source: local data frame [5 x 2]
## 
##   a  b
## 1 1  2
## 2 2  4
## 3 3  6
## 4 4  8
## 5 5 10
data_frame(a, b = a * 2, c = 1)
## Source: local data frame [5 x 3]
## 
##   a  b c
## 1 1  2 1
## 2 2  4 1
## 3 3  6 1
## 4 4  8 1
## 5 5 10 1
data_frame(x = runif(10), y = x * 2)
## Source: local data frame [10 x 2]
## 
##             x         y
## 1  0.41226008 0.8245202
## 2  0.55327525 1.1065505
## 3  0.23731705 0.4746341
## 4  0.58561299 1.1712260
## 5  0.08340908 0.1668182
## 6  0.74077732 1.4815546
## 7  0.56287209 1.1257442
## 8  0.97710989 1.9542198
## 9  0.18900110 0.3780022
## 10 0.11197740 0.2239548
# data_frame never coerces its inputs
str(data_frame(letters))
## Classes 'tbl_df', 'tbl' and 'data.frame':    26 obs. of  1 variable:
##  $ letters: chr  "a" "b" "c" "d" ...
str(data_frame(x = diag(5)))
## Classes 'tbl_df', 'tbl' and 'data.frame':    5 obs. of  1 variable:
##  $ x: num [1:5, 1:5] 1 0 0 0 0 0 1 0 0 0 ...
# or munges column names
data_frame(`a + b` = 1:5)
## Source: local data frame [5 x 1]
## 
##   a + b
## 1     1
## 2     2
## 3     3
## 4     4
## 5     5
#Examples
desc(1:10)
##  [1]  -1  -2  -3  -4  -5  -6  -7  -8  -9 -10
desc(factor(letters))
##  [1]  -1  -2  -3  -4  -5  -6  -7  -8  -9 -10 -11 -12 -13 -14 -15 -16 -17
## [18] -18 -19 -20 -21 -22 -23 -24 -25 -26
first_day <- seq(as.Date("1910/1/1"), as.Date("1920/1/1"), 
                 "years")
desc(first_day)
##  [1] 21915 21550 21185 20819 20454 20089 19724 19358 18993 18628 18263
#Examples
df <- data.frame(
  x = sample(10, 100, rep = TRUE),
  y = sample(10, 100, rep = TRUE)
)
nrow(df)
## [1] 100
nrow(distinct(df))
## [1] 65
distinct(df, x)
##     x y
## 1  10 5
## 2   8 4
## 3   5 5
## 4   7 8
## 5   2 8
## 6   9 3
## 7   4 2
## 8   1 7
## 9   3 6
## 10  6 3
distinct(df, y)
##     x  y
## 1  10  5
## 2   8  4
## 3  10  9
## 4   7  8
## 5   7 10
## 6   7  7
## 7   9  3
## 8   4  2
## 9   9  1
## 10  3  6
# You can also use distinct on computed variables
distinct(df, diff = abs(x - y))
##     x  y diff
## 1  10  5    5
## 2   8  4    4
## 3  10  9    1
## 4   5  5    0
## 5   7 10    3
## 6   2  8    6
## 7   4  2    2
## 8   9  1    8
## 9  10  1    9
## 10  8  1    7

——————————————-

#Examples
by_cyl <- group_by(mtcars, cyl)
do(by_cyl, head(., 2))
## Source: local data frame [6 x 12]
## Groups: cyl
## 
##    mpg cyl  disp  hp drat    wt  qsec vs am gear carb             model
## 1 22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1        Datsun 710
## 2 24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2         Merc 240D
## 3 21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4         Mazda RX4
## 4 21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4     Mazda RX4 Wag
## 5 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2 Hornet Sportabout
## 6 14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4        Duster 360
models <- by_cyl %>% do(mod = lm(mpg ~ disp, data = .))
models
## Source: local data frame [3 x 2]
## Groups: <by row>
## 
##   cyl     mod
## 1   4 <S3:lm>
## 2   6 <S3:lm>
## 3   8 <S3:lm>
summarise(models, rsq = summary(mod)$r.squared)
## Source: local data frame [3 x 1]
## 
##          rsq
## 1 0.64840514
## 2 0.01062604
## 3 0.27015777
models %>% do(data.frame(coef = coef(.$mod)))
## Source: local data frame [6 x 1]
## Groups: <by row>
## 
##           coef
## 1 40.871955322
## 2 -0.135141815
## 3 19.081987419
## 4  0.003605119
## 5 22.032798914
## 6 -0.019634095
models %>% do(data.frame(
  var = names(coef(.$mod)),
  coef(summary(.$mod)))
)
## Source: local data frame [6 x 5]
## Groups: <by row>
## 
##           var     Estimate  Std..Error    t.value     Pr...t..
## 1 (Intercept) 40.871955322 3.589605400 11.3861973 1.202715e-06
## 2        disp -0.135141815 0.033171608 -4.0740206 2.782827e-03
## 3 (Intercept) 19.081987419 2.913992892  6.5483988 1.243968e-03
## 4        disp  0.003605119 0.015557115  0.2317344 8.259297e-01
## 5 (Intercept) 22.032798914 3.345241115  6.5863112 2.588765e-05
## 6        disp -0.019634095 0.009315926 -2.1075838 5.677488e-02
models <- by_cyl %>% do(
  mod_linear = lm(mpg ~ disp, data = .),
  mod_quad = lm(mpg ~ poly(disp, 2), data = .)
)
models
## Source: local data frame [3 x 3]
## Groups: <by row>
## 
##   cyl mod_linear mod_quad
## 1   4    <S3:lm>  <S3:lm>
## 2   6    <S3:lm>  <S3:lm>
## 3   8    <S3:lm>  <S3:lm>
compare <- models %>% do(aov = anova(.$mod_linear, .$mod_quad))
# compare %>% summarise(p.value = aov$`Pr(>F)`)
if (require("nycflights13")) {
  # You can use it to do any arbitrary computation, like fitting a linear
  # model. Let's explore how carrier departure delays vary over the time
  carriers <- group_by(flights, carrier)
  group_size(carriers)
  mods <- do(carriers, mod = lm(arr_delay ~ dep_time, data = .))
  mods %>% do(as.data.frame(coef(.$mod)))
  mods %>% summarise(rsq = summary(mod)$r.squared)
  ## Not run:
  # This longer example shows the progress bar in action
  by_dest <- flights %>% group_by(dest) %>% filter(n() > 100)
  library(mgcv)
  by_dest %>% do(smooth = gam(arr_delay ~ s(dep_time) + month, data = .))
  ## End(Not run)
}
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## 
## The following object is masked from 'package:dplyr':
## 
##     collapse
## 
## This is mgcv 1.8-4. For overview type 'help("mgcv-package")'.
## Source: local data frame [93 x 2]
## Groups: <by row>
## 
##    dest            smooth
## 1   ABQ <S3:gam, glm, lm>
## 2   ACK <S3:gam, glm, lm>
## 3   ALB <S3:gam, glm, lm>
## 4   ATL <S3:gam, glm, lm>
## 5   AUS <S3:gam, glm, lm>
## 6   AVL <S3:gam, glm, lm>
## 7   BDL <S3:gam, glm, lm>
## 8   BGR <S3:gam, glm, lm>
## 9   BHM <S3:gam, glm, lm>
## 10  BNA <S3:gam, glm, lm>
## 11  BOS <S3:gam, glm, lm>
## 12  BQN <S3:gam, glm, lm>
## 13  BTV <S3:gam, glm, lm>
## 14  BUF <S3:gam, glm, lm>
## 15  BUR <S3:gam, glm, lm>
## 16  BWI <S3:gam, glm, lm>
## 17  CAE <S3:gam, glm, lm>
## 18  CAK <S3:gam, glm, lm>
## 19  CHS <S3:gam, glm, lm>
## 20  CLE <S3:gam, glm, lm>
## 21  CLT <S3:gam, glm, lm>
## 22  CMH <S3:gam, glm, lm>
## 23  CRW <S3:gam, glm, lm>
## 24  CVG <S3:gam, glm, lm>
## 25  DAY <S3:gam, glm, lm>
## 26  DCA <S3:gam, glm, lm>
## 27  DEN <S3:gam, glm, lm>
## 28  DFW <S3:gam, glm, lm>
## 29  DSM <S3:gam, glm, lm>
## 30  DTW <S3:gam, glm, lm>
## 31  EGE <S3:gam, glm, lm>
## 32  FLL <S3:gam, glm, lm>
## 33  GRR <S3:gam, glm, lm>
## 34  GSO <S3:gam, glm, lm>
## 35  GSP <S3:gam, glm, lm>
## 36  HNL <S3:gam, glm, lm>
## 37  HOU <S3:gam, glm, lm>
## 38  IAD <S3:gam, glm, lm>
## 39  IAH <S3:gam, glm, lm>
## 40  ILM <S3:gam, glm, lm>
## 41  IND <S3:gam, glm, lm>
## 42  JAX <S3:gam, glm, lm>
## 43  LAS <S3:gam, glm, lm>
## 44  LAX <S3:gam, glm, lm>
## 45  LGB <S3:gam, glm, lm>
## 46  MCI <S3:gam, glm, lm>
## 47  MCO <S3:gam, glm, lm>
## 48  MDW <S3:gam, glm, lm>
## 49  MEM <S3:gam, glm, lm>
## 50  MHT <S3:gam, glm, lm>
## 51  MIA <S3:gam, glm, lm>
## 52  MKE <S3:gam, glm, lm>
## 53  MSN <S3:gam, glm, lm>
## 54  MSP <S3:gam, glm, lm>
## 55  MSY <S3:gam, glm, lm>
## 56  MVY <S3:gam, glm, lm>
## 57  OAK <S3:gam, glm, lm>
## 58  OKC <S3:gam, glm, lm>
## 59  OMA <S3:gam, glm, lm>
## 60  ORD <S3:gam, glm, lm>
## 61  ORF <S3:gam, glm, lm>
## 62  PBI <S3:gam, glm, lm>
## 63  PDX <S3:gam, glm, lm>
## 64  PHL <S3:gam, glm, lm>
## 65  PHX <S3:gam, glm, lm>
## 66  PIT <S3:gam, glm, lm>
## 67  PSE <S3:gam, glm, lm>
## 68  PVD <S3:gam, glm, lm>
## 69  PWM <S3:gam, glm, lm>
## 70  RDU <S3:gam, glm, lm>
## 71  RIC <S3:gam, glm, lm>
## 72  ROC <S3:gam, glm, lm>
## 73  RSW <S3:gam, glm, lm>
## 74  SAN <S3:gam, glm, lm>
## 75  SAT <S3:gam, glm, lm>
## 76  SAV <S3:gam, glm, lm>
## 77  SDF <S3:gam, glm, lm>
## 78  SEA <S3:gam, glm, lm>
## 79  SFO <S3:gam, glm, lm>
## 80  SJC <S3:gam, glm, lm>
## 81  SJU <S3:gam, glm, lm>
## 82  SLC <S3:gam, glm, lm>
## 83  SMF <S3:gam, glm, lm>
## 84  SNA <S3:gam, glm, lm>
## 85  SRQ <S3:gam, glm, lm>
## 86  STL <S3:gam, glm, lm>
## 87  STT <S3:gam, glm, lm>
## 88  SYR <S3:gam, glm, lm>
## 89  TPA <S3:gam, glm, lm>
## 90  TUL <S3:gam, glm, lm>
## 91  TVC <S3:gam, glm, lm>
## 92  TYS <S3:gam, glm, lm>
## 93  XNA <S3:gam, glm, lm>

——————————————

#Examples
library(dplyr)

if (require("RSQLite") && has_lahman("sqlite")) {
  batting <- tbl(lahman_sqlite(), "Batting")
  batting %>% show_query()
  batting %>% explain()
  # The batting database has indices on all ID variables:
  # SQLite automatically picks the most restrictive index
  batting %>% 
    filter(lgID == "NL" & yearID == 2000L) %>% 
    explain()
  # OR's will use multiple indexes
  batting %>% 
    filter(lgID == "NL" | yearID == 2000) %>% 
    explain()
  # Joins will use indexes in both tables
  teams <- tbl(lahman_sqlite(), "Teams")
  batting %>% 
    left_join(teams, c("yearID", "teamID")) %>%
    explain()
}
## <SQL>
## SELECT "playerID", "yearID", "stint", "teamID", "lgID", "G", "G_batting", "AB", "R", "H", "X2B", "X3B", "HR", "RBI", "SB", "CS", "BB", "SO", "IBB", "HBP", "SH", "SF", "GIDP", "G_old"
## FROM "Batting"
## <SQL>
## SELECT "playerID", "yearID", "stint", "teamID", "lgID", "G", "G_batting", "AB", "R", "H", "X2B", "X3B", "HR", "RBI", "SB", "CS", "BB", "SO", "IBB", "HBP", "SH", "SF", "GIDP", "G_old"
## FROM "Batting"
## 
## 
## <PLAN>
##   selectid order from             detail
## 1        0     0    0 SCAN TABLE Batting
## <SQL>
## SELECT "playerID", "yearID", "stint", "teamID", "lgID", "G", "G_batting", "AB", "R", "H", "X2B", "X3B", "HR", "RBI", "SB", "CS", "BB", "SO", "IBB", "HBP", "SH", "SF", "GIDP", "G_old"
## FROM "Batting"
## WHERE "lgID" = 'NL' AND "yearID" = 2000
## 
## 
## <PLAN>
##   selectid order from
## 1        0     0    0
##                                                       detail
## 1 SEARCH TABLE Batting USING INDEX Batting_yearID (yearID=?)
## <SQL>
## SELECT "playerID", "yearID", "stint", "teamID", "lgID", "G", "G_batting", "AB", "R", "H", "X2B", "X3B", "HR", "RBI", "SB", "CS", "BB", "SO", "IBB", "HBP", "SH", "SF", "GIDP", "G_old"
## FROM "Batting"
## WHERE "lgID" = 'NL' OR "yearID" = 2000.0
## 
## 
## <PLAN>
##   selectid order from
## 1        0     0    0
## 2        0     0    0
##                                                       detail
## 1     SEARCH TABLE Batting USING INDEX Batting_lgID (lgID=?)
## 2 SEARCH TABLE Batting USING INDEX Batting_yearID (yearID=?)
## <SQL>
## SELECT "playerID", "yearID", "stint", "teamID", "lgID.x", "G.x", "G_batting", "AB.x", "R.x", "H.x", "X2B.x", "X3B.x", "HR.x", "RBI", "SB.x", "CS.x", "BB.x", "SO.x", "IBB", "HBP.x", "SH", "SF.x", "GIDP", "G_old", "lgID.y", "franchID", "divID", "Rank", "G.y", "Ghome", "W", "L", "DivWin", "WCWin", "LgWin", "WSWin", "R.y", "AB.y", "H.y", "X2B.y", "X3B.y", "HR.y", "BB.y", "SO.y", "SB.y", "CS.y", "HBP.y", "SF.y", "RA", "ER", "ERA", "CG", "SHO", "SV", "IPouts", "HA", "HRA", "BBA", "SOA", "E", "DP", "FP", "name", "park", "attendance", "BPF", "PPF", "teamIDBR", "teamIDlahman45", "teamIDretro"
## FROM (SELECT * FROM (SELECT "playerID" AS "playerID", "yearID" AS "yearID", "stint" AS "stint", "teamID" AS "teamID", "lgID" AS "lgID.x", "G" AS "G.x", "G_batting" AS "G_batting", "AB" AS "AB.x", "R" AS "R.x", "H" AS "H.x", "X2B" AS "X2B.x", "X3B" AS "X3B.x", "HR" AS "HR.x", "RBI" AS "RBI", "SB" AS "SB.x", "CS" AS "CS.x", "BB" AS "BB.x", "SO" AS "SO.x", "IBB" AS "IBB", "HBP" AS "HBP.x", "SH" AS "SH", "SF" AS "SF.x", "GIDP" AS "GIDP", "G_old" AS "G_old"
## FROM "Batting") AS "_W2"
## 
## LEFT JOIN 
## 
## (SELECT "yearID" AS "yearID", "lgID" AS "lgID.y", "teamID" AS "teamID", "franchID" AS "franchID", "divID" AS "divID", "Rank" AS "Rank", "G" AS "G.y", "Ghome" AS "Ghome", "W" AS "W", "L" AS "L", "DivWin" AS "DivWin", "WCWin" AS "WCWin", "LgWin" AS "LgWin", "WSWin" AS "WSWin", "R" AS "R.y", "AB" AS "AB.y", "H" AS "H.y", "X2B" AS "X2B.y", "X3B" AS "X3B.y", "HR" AS "HR.y", "BB" AS "BB.y", "SO" AS "SO.y", "SB" AS "SB.y", "CS" AS "CS.y", "HBP" AS "HBP.y", "SF" AS "SF.y", "RA" AS "RA", "ER" AS "ER", "ERA" AS "ERA", "CG" AS "CG", "SHO" AS "SHO", "SV" AS "SV", "IPouts" AS "IPouts", "HA" AS "HA", "HRA" AS "HRA", "BBA" AS "BBA", "SOA" AS "SOA", "E" AS "E", "DP" AS "DP", "FP" AS "FP", "name" AS "name", "park" AS "park", "attendance" AS "attendance", "BPF" AS "BPF", "PPF" AS "PPF", "teamIDBR" AS "teamIDBR", "teamIDlahman45" AS "teamIDlahman45", "teamIDretro" AS "teamIDretro"
## FROM "Teams") AS "_W3"
## 
## USING ("yearID", "teamID")) AS "_W4"
## 
## 
## <PLAN>
##   selectid order from
## 1        1     0    0
## 2        0     0    0
## 3        0     1    1
##                                                                            detail
## 1                                                                SCAN TABLE Teams
## 2                                                              SCAN TABLE Batting
## 3 SEARCH SUBQUERY 1 AS _W3 USING AUTOMATIC COVERING INDEX (teamID=? AND yearID=?)
  #Examples
library(dplyr)
library(RSQLite)
library(DBI)
library(mgcv)
library(nlme)

  f <- function(x) if (x == 1) stop("Error!") else 1
  ## Not run:
  #f(1)
  f(2)
## [1] 1
  ## End(Not run)
  safef <- failwith(NULL, f)
  #safef(1)
  safef(2)
## [1] 1
 # Examples
  filter(mtcars, cyl == 8)
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb               model
## 1  18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2   Hornet Sportabout
## 2  14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4          Duster 360
## 3  16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3          Merc 450SE
## 4  17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3          Merc 450SL
## 5  15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3         Merc 450SLC
## 6  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4  Cadillac Fleetwood
## 7  10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4 Lincoln Continental
## 8  14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4   Chrysler Imperial
## 9  15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2    Dodge Challenger
## 10 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2         AMC Javelin
## 11 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4          Camaro Z28
## 12 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2    Pontiac Firebird
## 13 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4      Ford Pantera L
## 14 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8       Maserati Bora
  filter(mtcars, cyl < 6)
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb          model
## 1  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1     Datsun 710
## 2  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2      Merc 240D
## 3  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2       Merc 230
## 4  32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1       Fiat 128
## 5  30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2    Honda Civic
## 6  33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1 Toyota Corolla
## 7  21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1  Toyota Corona
## 8  27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1      Fiat X1-9
## 9  26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2  Porsche 914-2
## 10 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2   Lotus Europa
## 11 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2     Volvo 142E
 # Examples
  funs(mean, "mean", mean(., na.rm = TRUE))
## <fun_calls>
## $ mean: mean(.)
## $ mean: mean(.)
## $ mean: mean(., na.rm = TRUE)
  # Overide default names
  funs(m1 = mean, m2 = "mean", m3 = mean(., na.rm = TRUE))
## <fun_calls>
## $ m1: mean(.)
## $ m2: mean(.)
## $ m3: mean(., na.rm = TRUE)
  # If you have a function names in a vector, use funs_q
  fs <- c("min", "max")
  funs_(fs)
## <fun_calls>
## $ min: min(.)
## $ max: max(.)
 # Examples
  glimpse(mtcars)
## Variables:
## $ mpg   (dbl) 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19...
## $ cyl   (dbl) 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4,...
## $ disp  (dbl) 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, ...
## $ hp    (dbl) 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180,...
## $ drat  (dbl) 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3....
## $ wt    (dbl) 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, ...
## $ qsec  (dbl) 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, ...
## $ vs    (dbl) 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1,...
## $ am    (dbl) 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,...
## $ gear  (dbl) 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4,...
## $ carb  (dbl) 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2,...
## $ model (chr) "Mazda RX4", "Mazda RX4 Wag", "Datsun 710", "Hornet 4 Dr...
  if (require("RSQLite") && has_lahman("sqlite")) {
    batting <- tbl(lahman_sqlite(), "Batting")
    glimpse(batting)
  }
## Variables:
## $ playerID  (chr) "aardsda01", "aardsda01", "aardsda01", "aardsda01", ...
## $ yearID    (int) 2004, 2006, 2007, 2008, 2009, 2010, 2012, 1954, 1955...
## $ stint     (int) 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
## $ teamID    (chr) "SFN", "CHN", "CHA", "BOS", "SEA", "SEA", "NYA", "ML...
## $ lgID      (chr) "NL", "NL", "AL", "AL", "AL", "AL", "AL", "NL", "NL"...
## $ G         (int) 11, 45, 25, 47, 73, 53, 1, 122, 153, 153, 151, 153, ...
## $ G_batting (int) 11, 43, 2, 5, 3, 4, NA, 122, 153, 153, 151, 153, 154...
## $ AB        (int) 0, 2, 0, 1, 0, 0, NA, 468, 602, 609, 615, 601, 629, ...
## $ R         (int) 0, 0, 0, 0, 0, 0, NA, 58, 105, 106, 118, 109, 116, 1...
## $ H         (int) 0, 0, 0, 0, 0, 0, NA, 131, 189, 200, 198, 196, 223, ...
## $ X2B       (int) 0, 0, 0, 0, 0, 0, NA, 27, 37, 34, 27, 34, 46, 20, 39...
## $ X3B       (int) 0, 0, 0, 0, 0, 0, NA, 6, 9, 14, 6, 4, 7, 11, 10, 6, ...
## $ HR        (int) 0, 0, 0, 0, 0, 0, NA, 13, 27, 26, 44, 30, 39, 40, 34...
## $ RBI       (int) 0, 0, 0, 0, 0, 0, NA, 69, 106, 92, 132, 95, 123, 126...
## $ SB        (int) 0, 0, 0, 0, 0, 0, NA, 2, 3, 2, 1, 4, 8, 16, 21, 15, ...
## $ CS        (int) 0, 0, 0, 0, 0, 0, NA, 2, 1, 4, 1, 1, 0, 7, 9, 7, 5, ...
## $ BB        (int) 0, 0, 0, 0, 0, 0, NA, 28, 49, 37, 57, 59, 51, 60, 56...
## $ SO        (int) 0, 0, 0, 1, 0, 0, NA, 39, 61, 54, 58, 49, 54, 63, 64...
## $ IBB       (int) 0, 0, 0, 0, 0, 0, NA, NA, 5, 6, 15, 16, 17, 13, 20, ...
## $ HBP       (int) 0, 0, 0, 0, 0, 0, NA, 3, 3, 2, 0, 1, 4, 2, 2, 3, 0, ...
## $ SH        (int) 0, 1, 0, 0, 0, 0, NA, 6, 7, 5, 0, 0, 0, 0, 1, 0, 0, ...
## $ SF        (int) 0, 0, 0, 0, 0, 0, NA, 4, 4, 7, 3, 3, 9, 12, 9, 6, 5,...
## $ GIDP      (int) 0, 0, 0, 0, 0, 0, NA, 13, 20, 21, 13, 21, 19, 8, 16,...
## $ G_old     (int) 11, 45, 2, 5, NA, NA, NA, 122, 153, 153, 151, 153, 1...
  #Examples
  if (require("data.table") && require("nycflights13")) {
    flights_dt <- tbl_dt(flights)
    group_size(group_by(flights_dt, year, month, day))
    group_size(group_by(flights_dt, dest))
    monthly <- group_by(flights_dt, month)
    summarise(monthly, n = n(), delay = mean(arr_delay))
  }
## Loading required package: data.table
## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, last
## Source: local data table [12 x 3]
## 
##    month     n delay
## 1      1 27004    NA
## 2      2 24951    NA
## 3      3 28834    NA
## 4      4 28330    NA
## 5      5 28796    NA
## 6      6 28243    NA
## 7      7 29425    NA
## 8      8 29327    NA
## 9      9 27574    NA
## 10    10 28889    NA
## 11    11 27268    NA
## 12    12 28135    NA
  #Examples
  grouped <- group_by(mtcars, cyl)
  groups(grouped)
## [[1]]
## cyl
  groups(ungroup(grouped))
## NULL

——————————————-

#Examples
by_cyl <- group_by(mtcars, cyl)
summarise(by_cyl, mean(disp), mean(hp))
## Source: local data frame [3 x 3]
## 
##   cyl mean(disp)  mean(hp)
## 1   4   105.1364  82.63636
## 2   6   183.3143 122.28571
## 3   8   353.1000 209.21429
filter(by_cyl, disp == max(disp))
## Source: local data frame [3 x 12]
## Groups: cyl
## 
##    mpg cyl  disp  hp drat    wt  qsec vs am gear carb              model
## 1 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1     Hornet 4 Drive
## 2 24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2          Merc 240D
## 3 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4 Cadillac Fleetwood
# summarise peels off a single layer of grouping
by_vs_am <- group_by(mtcars, vs, am)
by_vs <- summarise(by_vs_am, n = n())
by_vs
## Source: local data frame [4 x 3]
## Groups: vs
## 
##   vs am  n
## 1  0  0 12
## 2  0  1  6
## 3  1  0  7
## 4  1  1  7
summarise(by_vs, n = sum(n))
## Source: local data frame [2 x 2]
## 
##   vs  n
## 1  0 18
## 2  1 14
# use ungroup() to remove if not wanted
summarise(ungroup(by_vs), n = sum(n))
## Source: local data frame [1 x 1]
## 
##    n
## 1 32
# You can group by expressions: this is just short-hand for
# a mutate/rename followed by a simple group_by
group_by(mtcars, vsam = vs + am)
## Source: local data frame [32 x 13]
## Groups: vsam
## 
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb               model
## 1  21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4           Mazda RX4
## 2  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4       Mazda RX4 Wag
## 3  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1          Datsun 710
## 4  21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1      Hornet 4 Drive
## 5  18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2   Hornet Sportabout
## 6  18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1             Valiant
## 7  14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4          Duster 360
## 8  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2           Merc 240D
## 9  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2            Merc 230
## 10 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4            Merc 280
## 11 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4           Merc 280C
## 12 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3          Merc 450SE
## 13 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3          Merc 450SL
## 14 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3         Merc 450SLC
## 15 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4  Cadillac Fleetwood
## 16 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4 Lincoln Continental
## 17 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4   Chrysler Imperial
## 18 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1            Fiat 128
## 19 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2         Honda Civic
## 20 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1      Toyota Corolla
## 21 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1       Toyota Corona
## 22 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2    Dodge Challenger
## 23 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2         AMC Javelin
## 24 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4          Camaro Z28
## 25 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2    Pontiac Firebird
## 26 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1           Fiat X1-9
## 27 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2       Porsche 914-2
## 28 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2        Lotus Europa
## 29 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4      Ford Pantera L
## 30 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6        Ferrari Dino
## 31 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8       Maserati Bora
## 32 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2          Volvo 142E
## Variables not shown: vsam (dbl)
group_by(mtcars, vs2 = vs)
## Source: local data frame [32 x 13]
## Groups: vs2
## 
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb               model
## 1  21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4           Mazda RX4
## 2  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4       Mazda RX4 Wag
## 3  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1          Datsun 710
## 4  21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1      Hornet 4 Drive
## 5  18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2   Hornet Sportabout
## 6  18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1             Valiant
## 7  14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4          Duster 360
## 8  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2           Merc 240D
## 9  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2            Merc 230
## 10 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4            Merc 280
## 11 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4           Merc 280C
## 12 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3          Merc 450SE
## 13 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3          Merc 450SL
## 14 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3         Merc 450SLC
## 15 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4  Cadillac Fleetwood
## 16 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4 Lincoln Continental
## 17 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4   Chrysler Imperial
## 18 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1            Fiat 128
## 19 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2         Honda Civic
## 20 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1      Toyota Corolla
## 21 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1       Toyota Corona
## 22 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2    Dodge Challenger
## 23 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2         AMC Javelin
## 24 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4          Camaro Z28
## 25 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2    Pontiac Firebird
## 26 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1           Fiat X1-9
## 27 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2       Porsche 914-2
## 28 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2        Lotus Europa
## 29 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4      Ford Pantera L
## 30 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6        Ferrari Dino
## 31 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8       Maserati Bora
## 32 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2          Volvo 142E
## Variables not shown: vs2 (dbl)
# You can also group by a constant, but it's not very useful
group_by(mtcars, "vs")
## Source: local data frame [32 x 13]
## Groups: "vs"
## 
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb               model
## 1  21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4           Mazda RX4
## 2  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4       Mazda RX4 Wag
## 3  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1          Datsun 710
## 4  21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1      Hornet 4 Drive
## 5  18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2   Hornet Sportabout
## 6  18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1             Valiant
## 7  14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4          Duster 360
## 8  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2           Merc 240D
## 9  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2            Merc 230
## 10 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4            Merc 280
## 11 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4           Merc 280C
## 12 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3          Merc 450SE
## 13 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3          Merc 450SL
## 14 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3         Merc 450SLC
## 15 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4  Cadillac Fleetwood
## 16 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4 Lincoln Continental
## 17 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4   Chrysler Imperial
## 18 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1            Fiat 128
## 19 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2         Honda Civic
## 20 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1      Toyota Corolla
## 21 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1       Toyota Corona
## 22 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2    Dodge Challenger
## 23 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2         AMC Javelin
## 24 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4          Camaro Z28
## 25 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2    Pontiac Firebird
## 26 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1           Fiat X1-9
## 27 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2       Porsche 914-2
## 28 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2        Lotus Europa
## 29 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4      Ford Pantera L
## 30 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6        Ferrari Dino
## 31 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8       Maserati Bora
## 32 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2          Volvo 142E
## Variables not shown: "vs" (chr)
# By default, group_by sets groups. Use add = TRUE to add groups
groups(group_by(by_cyl, vs, am))
## [[1]]
## vs
## 
## [[2]]
## am
groups(group_by(by_cyl, vs, am, add = TRUE))
## [[1]]
## cyl
## 
## [[2]]
## vs
## 
## [[3]]
## am
# Duplicate groups are silently dropped
groups(group_by(by_cyl, cyl, cyl))
## [[1]]
## cyl
#Examples
if (require("nycflights13")) {
  by_day <- flights %>% group_by(year, month, day)
  n_groups(by_day)
  group_size(by_day)
  by_dest <- flights %>% group_by(dest)
  n_groups(by_dest)
  group_size(by_dest)
}
##   [1]   254   265   439     8 17215  2439   275   443   375   297  6333
##  [12] 15508   896  2589  4681   371  1781    36   116   864    52  2884
##  [23]  4573 14064  3524   138  3941  1525  9705  7266  8738   569  9384
##  [34]   213    17 12055   765  1606   849    15   707  2115  5700  7198
##  [45]   110  2077    25  2720  5997 16174     1     1   668  2008 14082
##  [56]  4113  1789  1009 11728  2802   572  7185  3799    15   221    59
##  [67]   312   346   849 17283  1536  6554  1354  1632  4656  2875   365
##  [78]    19   376  2352  8163  2454  2416  3537  2737   686   804    10
##  [89]  1157  3923 13331   329  5819  2467   284   825  1211  4339   522
## [100]  1761  7466   315   101   631  1036
#Examples
if (require("Lahman")) {
  batting_df <- tbl_df(Batting)
  person_df <- tbl_df(Master)
  uperson_df <- tbl_df(Master[!duplicated(Master$playerID), ])
  # Inner join: match batting and person data
  inner_join(batting_df, person_df)
  inner_join(batting_df, uperson_df)
  # Left join: match, but preserve batting data
  left_join(batting_df, uperson_df)
}  
## Loading required package: Lahman
## Joining by: "playerID"
## Joining by: "playerID"
## Joining by: "playerID"
## Source: local data frame [97,889 x 49]
## 
##     playerID yearID stint teamID lgID   G G_batting  AB   R   H X2B X3B HR
## 1  aardsda01   2004     1    SFN   NL  11        11   0   0   0   0   0  0
## 2  aardsda01   2006     1    CHN   NL  45        43   2   0   0   0   0  0
## 3  aardsda01   2007     1    CHA   AL  25         2   0   0   0   0   0  0
## 4  aardsda01   2008     1    BOS   AL  47         5   1   0   0   0   0  0
## 5  aardsda01   2009     1    SEA   AL  73         3   0   0   0   0   0  0
## 6  aardsda01   2010     1    SEA   AL  53         4   0   0   0   0   0  0
## 7  aardsda01   2012     1    NYA   AL   1        NA  NA  NA  NA  NA  NA NA
## 8  aaronha01   1954     1    ML1   NL 122       122 468  58 131  27   6 13
## 9  aaronha01   1955     1    ML1   NL 153       153 602 105 189  37   9 27
## 10 aaronha01   1956     1    ML1   NL 153       153 609 106 200  34  14 26
## ..       ...    ...   ...    ...  ... ...       ... ... ... ... ... ... ..
## Variables not shown: RBI (int), SB (int), CS (int), BB (int), SO (int),
##   IBB (int), HBP (int), SH (int), SF (int), GIDP (int), G_old (int),
##   birthYear (int), birthMonth (int), birthDay (int), birthCountry (chr),
##   birthState (chr), birthCity (chr), deathYear (int), deathMonth (int),
##   deathDay (int), deathCountry (chr), deathState (chr), deathCity (chr),
##   nameFirst (chr), nameLast (chr), nameGiven (chr), weight (int), height
##   (int), bats (fctr), throws (fctr), debut (chr), finalGame (chr), retroID
##   (chr), bbrefID (chr), deathDate (date), birthDate (date)
  #Examples
  if (require("Lahman")) {
    batting_df <- tbl_df(Batting)
    person_df <- tbl_df(Master)
    uperson_df <- tbl_df(Master[!duplicated(Master$playerID), ])
    # Inner join: match batting and person data
    inner_join(batting_df, person_df)
    inner_join(batting_df, uperson_df)
    # Left join: match, but preserve batting data
    left_join(batting_df, uperson_df)
  }    
## Joining by: "playerID"
## Joining by: "playerID"
## Joining by: "playerID"
## Source: local data frame [97,889 x 49]
## 
##     playerID yearID stint teamID lgID   G G_batting  AB   R   H X2B X3B HR
## 1  aardsda01   2004     1    SFN   NL  11        11   0   0   0   0   0  0
## 2  aardsda01   2006     1    CHN   NL  45        43   2   0   0   0   0  0
## 3  aardsda01   2007     1    CHA   AL  25         2   0   0   0   0   0  0
## 4  aardsda01   2008     1    BOS   AL  47         5   1   0   0   0   0  0
## 5  aardsda01   2009     1    SEA   AL  73         3   0   0   0   0   0  0
## 6  aardsda01   2010     1    SEA   AL  53         4   0   0   0   0   0  0
## 7  aardsda01   2012     1    NYA   AL   1        NA  NA  NA  NA  NA  NA NA
## 8  aaronha01   1954     1    ML1   NL 122       122 468  58 131  27   6 13
## 9  aaronha01   1955     1    ML1   NL 153       153 602 105 189  37   9 27
## 10 aaronha01   1956     1    ML1   NL 153       153 609 106 200  34  14 26
## ..       ...    ...   ...    ...  ... ...       ... ... ... ... ... ... ..
## Variables not shown: RBI (int), SB (int), CS (int), BB (int), SO (int),
##   IBB (int), HBP (int), SH (int), SF (int), GIDP (int), G_old (int),
##   birthYear (int), birthMonth (int), birthDay (int), birthCountry (chr),
##   birthState (chr), birthCity (chr), deathYear (int), deathMonth (int),
##   deathDay (int), deathCountry (chr), deathState (chr), deathCity (chr),
##   nameFirst (chr), nameLast (chr), nameGiven (chr), weight (int), height
##   (int), bats (fctr), throws (fctr), debut (chr), finalGame (chr), retroID
##   (chr), bbrefID (chr), deathDate (date), birthDate (date)
    #Examples
    if (require("data.table") && require("Lahman")) {
      batting_dt <- tbl_dt(Batting)
      person_dt <- tbl_dt(Master)
      # Inner join: match batting and person data
      inner_join(batting_dt, person_dt)
      # Left join: keep batting data even if person missing
      left_join(batting_dt, person_dt)
      # Semi-join: find batting data for top 4 teams, 2010:2012
      grid <- expand.grid(
        teamID = c("WAS", "ATL", "PHI", "NYA"),
        yearID = 2010:2012)
      top4 <- semi_join(batting_dt, grid, copy = TRUE)
      # Anti-join: find batting data with out player data
      anti_join(batting_dt, person_dt)
    }
## Joining by: "playerID"
## Joining by: "playerID"
## Joining by: c("yearID", "teamID")
## Joining by: "playerID"
## Source: local data table [0 x 24]
#Examples
library(dplyr)
library(RSQLite)
library(DBI)
library(mgcv)
library(nlme)
library(data.table)
library(Lahman)

#if (require("RSQLite") && has_lahman("sqlite")) {
  # Left joins ----------------------------------------------------------------
  batting <- tbl(lahman_sqlite(), "Batting")
head(batting,5)
##    playerID yearID stint teamID lgID  G G_batting AB R H X2B X3B HR RBI SB
## 1 aardsda01   2004     1    SFN   NL 11        11  0 0 0   0   0  0   0  0
## 2 aardsda01   2006     1    CHN   NL 45        43  2 0 0   0   0  0   0  0
## 3 aardsda01   2007     1    CHA   AL 25         2  0 0 0   0   0  0   0  0
## 4 aardsda01   2008     1    BOS   AL 47         5  1 0 0   0   0  0   0  0
## 5 aardsda01   2009     1    SEA   AL 73         3  0 0 0   0   0  0   0  0
##   CS BB SO IBB HBP SH SF GIDP G_old
## 1  0  0  0   0   0  0  0    0    11
## 2  0  0  0   0   0  1  0    0    45
## 3  0  0  0   0   0  0  0    0     2
## 4  0  0  1   0   0  0  0    0     5
## 5  0  0  0   0   0  0  0    0    NA
  team_info <- select(tbl(lahman_sqlite(), "Teams"), yearID, lgID, teamID, G, R:H)
head(team_info,5)
##   yearID lgID teamID  G   R   AB   H
## 1   1871   NA    PH1 28 376 1281 410
## 2   1871   NA    CH1 28 302 1196 323
## 3   1871   NA    BS1 31 401 1372 426
## 4   1871   NA    WS3 32 310 1353 375
## 5   1871   NA    NY2 33 302 1404 403
  # Combine player and whole team statistics
  first_stint <- select(filter(batting, stint == 1), playerID:H)
head(first_stint,5)
##    playerID yearID stint teamID lgID  G G_batting AB R H
## 1 aardsda01   2004     1    SFN   NL 11        11  0 0 0
## 2 aardsda01   2006     1    CHN   NL 45        43  2 0 0
## 3 aardsda01   2007     1    CHA   AL 25         2  0 0 0
## 4 aardsda01   2008     1    BOS   AL 47         5  1 0 0
## 5 aardsda01   2009     1    SEA   AL 73         3  0 0 0
  both <- left_join(first_stint, team_info, type = "inner", by = c("yearID", "teamID", "lgID"))
  head(both)
##    playerID yearID stint teamID lgID G.x G_batting AB.x R.x H.x G.y R.y
## 1 aardsda01   2004     1    SFN   NL  11        11    0   0   0 162 850
## 2 aardsda01   2006     1    CHN   NL  45        43    2   0   0 162 716
## 3 aardsda01   2007     1    CHA   AL  25         2    0   0   0 162 693
## 4 aardsda01   2008     1    BOS   AL  47         5    1   0   0 162 845
## 5 aardsda01   2009     1    SEA   AL  73         3    0   0   0 162 640
## 6 aardsda01   2010     1    SEA   AL  53         4    0   0   0 162 513
##   AB.y  H.y
## 1 5546 1500
## 2 5587 1496
## 3 5441 1341
## 4 5596 1565
## 5 5543 1430
## 6 5409 1274
  #explain(both)

  # Join with a local data frame
  grid <- expand.grid(
    teamID = c("WAS", "ATL", "PHI", "NYA"),
    yearID = 2010:2012)
grid
##    teamID yearID
## 1     WAS   2010
## 2     ATL   2010
## 3     PHI   2010
## 4     NYA   2010
## 5     WAS   2011
## 6     ATL   2011
## 7     PHI   2011
## 8     NYA   2011
## 9     WAS   2012
## 10    ATL   2012
## 11    PHI   2012
## 12    NYA   2012
  top4a <- left_join(batting, grid, copy = TRUE)
## Joining by: c("yearID", "teamID")
head(top4a)
##    playerID yearID stint teamID lgID  G G_batting AB R H X2B X3B HR RBI SB
## 1 aardsda01   2004     1    SFN   NL 11        11  0 0 0   0   0  0   0  0
## 2 aardsda01   2006     1    CHN   NL 45        43  2 0 0   0   0  0   0  0
## 3 aardsda01   2007     1    CHA   AL 25         2  0 0 0   0   0  0   0  0
## 4 aardsda01   2008     1    BOS   AL 47         5  1 0 0   0   0  0   0  0
## 5 aardsda01   2009     1    SEA   AL 73         3  0 0 0   0   0  0   0  0
## 6 aardsda01   2010     1    SEA   AL 53         4  0 0 0   0   0  0   0  0
##   CS BB SO IBB HBP SH SF GIDP G_old
## 1  0  0  0   0   0  0  0    0    11
## 2  0  0  0   0   0  1  0    0    45
## 3  0  0  0   0   0  0  0    0     2
## 4  0  0  1   0   0  0  0    0     5
## 5  0  0  0   0   0  0  0    0    NA
## 6  0  0  0   0   0  0  0    0    NA
#  explain(top4a)
  # Indices don't really help here because there's no matching index on
  # batting
 # top4b <- left_join(batting, grid, copy = TRUE, auto_index = TRUE)
#head(top4b)
#  explain(top4b)

  # Semi-joins 
  people <- tbl(lahman_sqlite(), "Master")
head(people)
##    playerID birthYear birthMonth birthDay birthCountry birthState
## 1 aardsda01      1981         12       27          USA         CO
## 2 aaronha01      1934          2        5          USA         AL
## 3 aaronto01      1939          8        5          USA         AL
## 4  aasedo01      1954          9        8          USA         CA
## 5  abadan01      1972          8       25          USA         FL
## 6  abadfe01      1985         12       17         D.R.  La Romana
##    birthCity deathYear deathMonth deathDay deathCountry deathState
## 1     Denver        NA         NA       NA         <NA>       <NA>
## 2     Mobile        NA         NA       NA         <NA>       <NA>
## 3     Mobile      1984          8       16          USA         GA
## 4     Orange        NA         NA       NA         <NA>       <NA>
## 5 Palm Beach        NA         NA       NA         <NA>       <NA>
## 6  La Romana        NA         NA       NA         <NA>       <NA>
##   deathCity nameFirst nameLast        nameGiven weight height bats throws
## 1      <NA>     David  Aardsma      David Allan    205     75    R      R
## 2      <NA>      Hank    Aaron      Henry Louis    180     72    R      R
## 3   Atlanta    Tommie    Aaron       Tommie Lee    190     75    R      R
## 4      <NA>       Don     Aase   Donald William    190     75    R      R
## 5      <NA>      Andy     Abad    Fausto Andres    184     73    L      L
## 6      <NA>  Fernando     Abad Fernando Antonio    220     73    L      L
##        debut  finalGame  retroID   bbrefID deathDate birthDate
## 1 2004-04-06 2013-09-28 aardd001 aardsda01        NA      4378
## 2 1954-04-13 1976-10-03 aaroh101 aaronha01        NA    -13114
## 3 1962-04-10 1971-09-26 aarot101 aaronto01      5341    -11107
## 4 1977-07-26 1990-10-03 aased001  aasedo01        NA     -5594
## 5 2001-09-10 2006-04-13 abada001  abadan01        NA       967
## 6 2010-07-28 2013-09-27 abadf001  abadfe01        NA      5829
  # All people in half of fame
  hof <- tbl(lahman_sqlite(), "HallOfFame")
head(hof)
##    playerID yearID votedBy ballots needed votes inducted category
## 1  cobbty01   1936   BBWAA     226    170   222        Y   Player
## 2  ruthba01   1936   BBWAA     226    170   215        Y   Player
## 3 wagneho01   1936   BBWAA     226    170   215        Y   Player
## 4 mathech01   1936   BBWAA     226    170   205        Y   Player
## 5 johnswa01   1936   BBWAA     226    170   189        Y   Player
## 6 lajoina01   1936   BBWAA     226    170   146        N   Player
##   needed_note
## 1        <NA>
## 2        <NA>
## 3        <NA>
## 4        <NA>
## 5        <NA>
## 6        <NA>
  semi_join(people, hof)
## Joining by: "playerID"
## Source: sqlite 3.8.6 [C:\Users\SHIMAD~1\AppData\Local\Temp\RtmpMfAkMc/lahman.sqlite]
## From: <derived table> [?? x 26]
## 
##     playerID birthYear birthMonth birthDay birthCountry birthState
## 1  aaronha01      1934          2        5          USA         AL
## 2  abbotji01      1967          9       19          USA         MI
## 3  adamsba01      1882          5       18          USA         IN
## 4  adamsbo03      1921         12       14          USA         CA
## 5  adamssp01      1894          8       26          USA         PA
## 6   ageeto01      1942          8        9          USA         AL
## 7  aguilri01      1961         12       31          USA         CA
## 8   akerja01      1940          7       13          USA         CA
## 9  alexado01      1950          9        4          USA         AL
## 10 alexape01      1887          2       26          USA         NE
## ..       ...       ...        ...      ...          ...        ...
## Variables not shown: birthCity (chr), deathYear (int), deathMonth (int),
##   deathDay (int), deathCountry (chr), deathState (chr), deathCity (chr),
##   nameFirst (chr), nameLast (chr), nameGiven (chr), weight (int), height
##   (int), bats (chr), throws (chr), debut (chr), finalGame (chr), retroID
##   (chr), bbrefID (chr), deathDate (dbl), birthDate (dbl)
  anti_join(people, hof)
## Joining by: "playerID"
## Source: sqlite 3.8.6 [C:\Users\SHIMAD~1\AppData\Local\Temp\RtmpMfAkMc/lahman.sqlite]
## From: <derived table> [?? x 26]
## 
##     playerID birthYear birthMonth birthDay birthCountry birthState
## 1  aardsda01      1981         12       27          USA         CO
## 2  aaronto01      1939          8        5          USA         AL
## 3   aasedo01      1954          9        8          USA         CA
## 4   abadan01      1972          8       25          USA         FL
## 5   abadfe01      1985         12       17         D.R.  La Romana
## 6  abadijo01      1854         11        4          USA         PA
## 7  abbated01      1877          4       15          USA         PA
## 8  abbeybe01      1869         11       11          USA         VT
## 9  abbeych01      1866         10       14          USA         NE
## 10 abbotda01      1862          3       16          USA         OH
## ..       ...       ...        ...      ...          ...        ...
## Variables not shown: birthCity (chr), deathYear (int), deathMonth (int),
##   deathDay (int), deathCountry (chr), deathState (chr), deathCity (chr),
##   nameFirst (chr), nameLast (chr), nameGiven (chr), weight (int), height
##   (int), bats (chr), throws (chr), debut (chr), finalGame (chr), retroID
##   (chr), bbrefID (chr), deathDate (dbl), birthDate (dbl)
  # Find all managers
  manager <- tbl(lahman_sqlite(), "Managers")
head(manager)
##    playerID yearID teamID lgID inseason  G  W  L rank plyrMgr
## 1 wrighha01   1871    BS1   NA        1 31 20 10    3       Y
## 2  woodji01   1871    CH1   NA        1 28 19  9    2       Y
## 3 paborch01   1871    CL1   NA        1 29 10 19    8       Y
## 4 lennobi01   1871    FW1   NA        1 14  5  9    8       Y
## 5 deaneha01   1871    FW1   NA        2  5  2  3    8       Y
## 6 fergubo01   1871    NY2   NA        1 33 16 17    5       Y
  # Find all managers in hall of fame
  famous_manager <- semi_join(semi_join(people, manager), hof)
## Joining by: "playerID"
## Joining by: "playerID"
  famous_manager
## Source: sqlite 3.8.6 [C:\Users\SHIMAD~1\AppData\Local\Temp\RtmpMfAkMc/lahman.sqlite]
## From: <derived table> [?? x 26]
## 
##     playerID birthYear birthMonth birthDay   birthCountry    birthState
## 1  allisdo01      1846          7       12            USA            PA
## 2  alomasa02      1966          6       18           P.R.            NA
## 3   aloufe01      1935          5       12           D.R. San Cristobal
## 4  alstowa01      1911         12        1            USA            OH
## 5  andersp01      1934          2       22            USA            SD
## 6  ansonca01      1852          4       17            USA            IA
## 7  applilu01      1907          4        2            USA            NC
## 8  austiji01      1879         12        8 United Kingdom       Swansea
## 9  bakerdu01      1949          6       15            USA            CA
## 10 bancrda01      1891          4       20            USA            IA
## ..       ...       ...        ...      ...            ...           ...
## Variables not shown: birthCity (chr), deathYear (int), deathMonth (int),
##   deathDay (int), deathCountry (chr), deathState (chr), deathCity (chr),
##   nameFirst (chr), nameLast (chr), nameGiven (chr), weight (int), height
##   (int), bats (chr), throws (chr), debut (chr), finalGame (chr), retroID
##   (chr), bbrefID (chr), deathDate (dbl), birthDate (dbl)
#  explain(famous_manager)
  # Anti-joins ----------------------------------------------------------------
  # batters without person covariates
 #anti_join(batting, people)
#}
#Examples
lead(1:10, 1)
##  [1]  2  3  4  5  6  7  8  9 10 NA
lead(1:10, 2)
##  [1]  3  4  5  6  7  8  9 10 NA NA
lag(1:10, 1)
##  [1] NA  1  2  3  4  5  6  7  8  9
lead(1:10, 1)
##  [1]  2  3  4  5  6  7  8  9 10 NA
x <- runif(5)
cbind(ahead = lead(x), x, behind = lag(x))
##          ahead         x    behind
## [1,] 0.2664911 0.1326000        NA
## [2,] 0.0858742 0.2664911 0.1326000
## [3,] 0.4120477 0.0858742 0.2664911
## [4,] 0.9449989 0.4120477 0.0858742
## [5,]        NA 0.9449989 0.4120477
# Use order_by if data not already ordered
df <- data.frame(year = 2000:2005, value = (0:5) ^ 2)
scrambled <- df[sample(nrow(df)), ]
wrong <- mutate(scrambled, prev = lag(value))
arrange(wrong, year)
##   year value prev
## 1 2000     0    1
## 2 2001     1   16
## 3 2002     4    9
## 4 2003     9    0
## 5 2004    16   25
## 6 2005    25   NA
right <- mutate(scrambled, prev = lag(value, order_by = year))
arrange(right, year)
##   year value prev
## 1 2000     0   NA
## 2 2001     1    0
## 3 2002     4    1
## 4 2003     9    4
## 5 2004    16    9
## 6 2005    25   16
#Examples
location(mtcars)
## <05CF3A20>
## Variables:
##  * mpg:       <03D70308>
##  * cyl:       <04D50D40>
##  * disp:      <03BD45E8>
##  * hp:        <0391C8E0>
##  * drat:      <02331CF8>
##  * wt:        <0384D650>
##  * qsec:      <03292E50>
##  * vs:        <03AE4BA0>
##  * am:        <022EC4E8>
##  * gear:      <039A0BA8>
##  * carb:      <035BD618>
##  * model:     <05FCE8D8>
## Attributes:
##  * names:     <05CF3A78>
##  * row.names: <05FCE8D8>
##  * class:     <05F00728>
mtcars2 <- mutate(mtcars, cyl2 = cyl * 2)
location(mtcars2)
## <12BB4E50>
## Variables:
##  * mpg:       <03D70308>
##  * cyl:       <04D50D40>
##  * disp:      <03BD45E8>
##  * hp:        <0391C8E0>
##  * drat:      <02331CF8>
##  * wt:        <0384D650>
##  * qsec:      <03292E50>
##  * vs:        <03AE4BA0>
##  * am:        <022EC4E8>
##  * gear:      <039A0BA8>
##  * carb:      <035BD618>
##  * model:     <05FCE8D8>
##  * cyl2:      <0CE30990>
## Attributes:
##  * class:     <0C30F8C8>
##  * names:     <12BB4DF8>
##  * row.names: <11649B20>
changes(mtcars, mtcars)
## <identical>
changes(mtcars, mtcars2)
## Changed variables:
##           old     new     
## cyl2      <added> 0CE30990
## 
## Changed attributes:
##           old      new     
## names     05CF3A78 12BB4DF8
## row.names 05FCE8D8 0D5FF9D8
## class     05F00728 0C30F8C8