import data

flights
## # A tibble: 336,776 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # ℹ 336,766 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>

Filter rows

filter(flights, month == 1, day ==1)
## # A tibble: 842 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # ℹ 832 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>
filter(flights, month == 1 & day == 1)
## # A tibble: 842 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # ℹ 832 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>
filter(flights, month == 1 | day == 1)
## # A tibble: 37,198 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # ℹ 37,188 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>
filter(flights, month %in% c(11,12))
## # A tibble: 55,403 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013    11     1        5           2359         6      352            345
##  2  2013    11     1       35           2250       105      123           2356
##  3  2013    11     1      455            500        -5      641            651
##  4  2013    11     1      539            545        -6      856            827
##  5  2013    11     1      542            545        -3      831            855
##  6  2013    11     1      549            600       -11      912            923
##  7  2013    11     1      550            600       -10      705            659
##  8  2013    11     1      554            600        -6      659            701
##  9  2013    11     1      554            600        -6      826            827
## 10  2013    11     1      554            600        -6      749            751
## # ℹ 55,393 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>

Arrange rows

arrange(flights, desc(month), desc(day))
## # A tibble: 336,776 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013    12    31       13           2359        14      439            437
##  2  2013    12    31       18           2359        19      449            444
##  3  2013    12    31       26           2245       101      129           2353
##  4  2013    12    31      459            500        -1      655            651
##  5  2013    12    31      514            515        -1      814            812
##  6  2013    12    31      549            551        -2      925            900
##  7  2013    12    31      550            600       -10      725            745
##  8  2013    12    31      552            600        -8      811            826
##  9  2013    12    31      553            600        -7      741            754
## 10  2013    12    31      554            550         4     1024           1027
## # ℹ 336,766 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>

Select columns

select(flights, year: dep_time)
## # A tibble: 336,776 × 4
##     year month   day dep_time
##    <int> <int> <int>    <int>
##  1  2013     1     1      517
##  2  2013     1     1      533
##  3  2013     1     1      542
##  4  2013     1     1      544
##  5  2013     1     1      554
##  6  2013     1     1      554
##  7  2013     1     1      555
##  8  2013     1     1      557
##  9  2013     1     1      557
## 10  2013     1     1      558
## # ℹ 336,766 more rows
select (flights, year, month, day, dep_time)
## # A tibble: 336,776 × 4
##     year month   day dep_time
##    <int> <int> <int>    <int>
##  1  2013     1     1      517
##  2  2013     1     1      533
##  3  2013     1     1      542
##  4  2013     1     1      544
##  5  2013     1     1      554
##  6  2013     1     1      554
##  7  2013     1     1      555
##  8  2013     1     1      557
##  9  2013     1     1      557
## 10  2013     1     1      558
## # ℹ 336,766 more rows
select (flights, year, month, day, dep_time, dep_delay)
## # A tibble: 336,776 × 5
##     year month   day dep_time dep_delay
##    <int> <int> <int>    <int>     <dbl>
##  1  2013     1     1      517         2
##  2  2013     1     1      533         4
##  3  2013     1     1      542         2
##  4  2013     1     1      544        -1
##  5  2013     1     1      554        -6
##  6  2013     1     1      554        -4
##  7  2013     1     1      555        -5
##  8  2013     1     1      557        -3
##  9  2013     1     1      557        -3
## 10  2013     1     1      558        -2
## # ℹ 336,766 more rows
select (flights, year, month, day, starts_with("dep"))
## # A tibble: 336,776 × 5
##     year month   day dep_time dep_delay
##    <int> <int> <int>    <int>     <dbl>
##  1  2013     1     1      517         2
##  2  2013     1     1      533         4
##  3  2013     1     1      542         2
##  4  2013     1     1      544        -1
##  5  2013     1     1      554        -6
##  6  2013     1     1      554        -4
##  7  2013     1     1      555        -5
##  8  2013     1     1      557        -3
##  9  2013     1     1      557        -3
## 10  2013     1     1      558        -2
## # ℹ 336,766 more rows
select (flights, year, month, day, contains("time"))
## # A tibble: 336,776 × 9
##     year month   day dep_time sched_dep_time arr_time sched_arr_time air_time
##    <int> <int> <int>    <int>          <int>    <int>          <int>    <dbl>
##  1  2013     1     1      517            515      830            819      227
##  2  2013     1     1      533            529      850            830      227
##  3  2013     1     1      542            540      923            850      160
##  4  2013     1     1      544            545     1004           1022      183
##  5  2013     1     1      554            600      812            837      116
##  6  2013     1     1      554            558      740            728      150
##  7  2013     1     1      555            600      913            854      158
##  8  2013     1     1      557            600      709            723       53
##  9  2013     1     1      557            600      838            846      140
## 10  2013     1     1      558            600      753            745      138
## # ℹ 336,766 more rows
## # ℹ 1 more variable: time_hour <dttm>
select (flights, year, month, day, ends_with("time"))
## # A tibble: 336,776 × 8
##     year month   day dep_time sched_dep_time arr_time sched_arr_time air_time
##    <int> <int> <int>    <int>          <int>    <int>          <int>    <dbl>
##  1  2013     1     1      517            515      830            819      227
##  2  2013     1     1      533            529      850            830      227
##  3  2013     1     1      542            540      923            850      160
##  4  2013     1     1      544            545     1004           1022      183
##  5  2013     1     1      554            600      812            837      116
##  6  2013     1     1      554            558      740            728      150
##  7  2013     1     1      555            600      913            854      158
##  8  2013     1     1      557            600      709            723       53
##  9  2013     1     1      557            600      838            846      140
## 10  2013     1     1      558            600      753            745      138
## # ℹ 336,766 more rows

What’s in a name?

Calling functions

Use of TAB

continuation character, +

Printing to screen