Introduction to dplyr Functions in R

Illya Mowerman

Dataset Introduction: mtcars

head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

1. Filter

Purpose: Subset rows based on conditions

# Keep only cars with more than 6 cylinders and mpg greater than 15
high_power_cars <- mtcars %>%  
  filter(cyl > 6 & mpg > 15)
print(high_power_cars)
##                    mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Hornet Sportabout 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Merc 450SE        16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Merc 450SL        17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Merc 450SLC       15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Dodge Challenger  15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## AMC Javelin       15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Pontiac Firebird  19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## Ford Pantera L    15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4

2. Select

Purpose: Choose specific columns

# Select only the mpg, hp, and wt columns
efficiency_power <- mtcars %>% 
  select(mpg, hp, wt)
print(head(efficiency_power))
##                    mpg  hp    wt
## Mazda RX4         21.0 110 2.620
## Mazda RX4 Wag     21.0 110 2.875
## Datsun 710        22.8  93 2.320
## Hornet 4 Drive    21.4 110 3.215
## Hornet Sportabout 18.7 175 3.440
## Valiant           18.1 105 3.460

3. Rename

Purpose: Change column names

# Rename 'mpg' to 'miles_per_gallon' and 'hp' to 'horsepower'
renamed_cars  <- mtcars %>%  
  rename(miles_per_gallon = mpg, horsepower = hp)
print(head(renamed_cars))
##                   miles_per_gallon cyl disp horsepower drat    wt  qsec vs am
## Mazda RX4                     21.0   6  160        110 3.90 2.620 16.46  0  1
## Mazda RX4 Wag                 21.0   6  160        110 3.90 2.875 17.02  0  1
## Datsun 710                    22.8   4  108         93 3.85 2.320 18.61  1  1
## Hornet 4 Drive                21.4   6  258        110 3.08 3.215 19.44  1  0
## Hornet Sportabout             18.7   8  360        175 3.15 3.440 17.02  0  0
## Valiant                       18.1   6  225        105 2.76 3.460 20.22  1  0
##                   gear carb
## Mazda RX4            4    4
## Mazda RX4 Wag        4    4
## Datsun 710           4    1
## Hornet 4 Drive       3    1
## Hornet Sportabout    3    2
## Valiant              3    1

4. Mutate

Purpose: Create new columns or modify existing ones

# Create a new column 'efficiency_ratio' (mpg / hp)
cars_with_ratio <- mtcars %>% 
  mutate(efficiency_ratio = mpg / hp)
print(select(cars_with_ratio, mpg, hp, efficiency_ratio))
##                      mpg  hp efficiency_ratio
## Mazda RX4           21.0 110       0.19090909
## Mazda RX4 Wag       21.0 110       0.19090909
## Datsun 710          22.8  93       0.24516129
## Hornet 4 Drive      21.4 110       0.19454545
## Hornet Sportabout   18.7 175       0.10685714
## Valiant             18.1 105       0.17238095
## Duster 360          14.3 245       0.05836735
## Merc 240D           24.4  62       0.39354839
## Merc 230            22.8  95       0.24000000
## Merc 280            19.2 123       0.15609756
## Merc 280C           17.8 123       0.14471545
## Merc 450SE          16.4 180       0.09111111
## Merc 450SL          17.3 180       0.09611111
## Merc 450SLC         15.2 180       0.08444444
## Cadillac Fleetwood  10.4 205       0.05073171
## Lincoln Continental 10.4 215       0.04837209
## Chrysler Imperial   14.7 230       0.06391304
## Fiat 128            32.4  66       0.49090909
## Honda Civic         30.4  52       0.58461538
## Toyota Corolla      33.9  65       0.52153846
## Toyota Corona       21.5  97       0.22164948
## Dodge Challenger    15.5 150       0.10333333
## AMC Javelin         15.2 150       0.10133333
## Camaro Z28          13.3 245       0.05428571
## Pontiac Firebird    19.2 175       0.10971429
## Fiat X1-9           27.3  66       0.41363636
## Porsche 914-2       26.0  91       0.28571429
## Lotus Europa        30.4 113       0.26902655
## Ford Pantera L      15.8 264       0.05984848
## Ferrari Dino        19.7 175       0.11257143
## Maserati Bora       15.0 335       0.04477612
## Volvo 142E          21.4 109       0.19633028

5. Sort (arrange)

Purpose: Order rows based on column values

# Sort cars by mpg in descending order, then by horsepower in ascending order
sorted_cars <- mtcars %>%  
  arrange(desc(mpg), hp)
print(select(sorted_cars, mpg, hp))
##                      mpg  hp
## Toyota Corolla      33.9  65
## Fiat 128            32.4  66
## Honda Civic         30.4  52
## Lotus Europa        30.4 113
## Fiat X1-9           27.3  66
## Porsche 914-2       26.0  91
## Merc 240D           24.4  62
## Datsun 710          22.8  93
## Merc 230            22.8  95
## Toyota Corona       21.5  97
## Volvo 142E          21.4 109
## Hornet 4 Drive      21.4 110
## Mazda RX4           21.0 110
## Mazda RX4 Wag       21.0 110
## Ferrari Dino        19.7 175
## Merc 280            19.2 123
## Pontiac Firebird    19.2 175
## Hornet Sportabout   18.7 175
## Valiant             18.1 105
## Merc 280C           17.8 123
## Merc 450SL          17.3 180
## Merc 450SE          16.4 180
## Ford Pantera L      15.8 264
## Dodge Challenger    15.5 150
## AMC Javelin         15.2 150
## Merc 450SLC         15.2 180
## Maserati Bora       15.0 335
## Chrysler Imperial   14.7 230
## Duster 360          14.3 245
## Camaro Z28          13.3 245
## Cadillac Fleetwood  10.4 205
## Lincoln Continental 10.4 215

Conclusion

These functions can be combined using the pipe operator %>% for efficient data manipulation:

mtcars %>%
  filter(cyl == 8) %>%
  select(mpg, hp, wt) %>%
  mutate(power_to_weight = hp / wt) %>%
  arrange(desc(power_to_weight)) %>%
  head(5)
##                    mpg  hp   wt power_to_weight
## Maserati Bora     15.0 335 3.57        93.83754
## Ford Pantera L    15.8 264 3.17        83.28076
## Duster 360        14.3 245 3.57        68.62745
## Camaro Z28        13.3 245 3.84        63.80208
## Hornet Sportabout 18.7 175 3.44        50.87209

Practice and real-world examples are key to mastering these functions!