import data
flights
## # A tibble: 336,776 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 542 540 2 923 850
## 4 2013 1 1 544 545 -1 1004 1022
## 5 2013 1 1 554 600 -6 812 837
## 6 2013 1 1 554 558 -4 740 728
## 7 2013 1 1 555 600 -5 913 854
## 8 2013 1 1 557 600 -3 709 723
## 9 2013 1 1 557 600 -3 838 846
## 10 2013 1 1 558 600 -2 753 745
## # ℹ 336,766 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
nycflights13::flights
## # A tibble: 336,776 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 542 540 2 923 850
## 4 2013 1 1 544 545 -1 1004 1022
## 5 2013 1 1 554 600 -6 812 837
## 6 2013 1 1 554 558 -4 740 728
## 7 2013 1 1 555 600 -5 913 854
## 8 2013 1 1 557 600 -3 709 723
## 9 2013 1 1 557 600 -3 838 846
## 10 2013 1 1 558 600 -2 753 745
## # ℹ 336,766 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
filter rows
filter(flights, month == 1, day == 1)
## # A tibble: 842 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 542 540 2 923 850
## 4 2013 1 1 544 545 -1 1004 1022
## 5 2013 1 1 554 600 -6 812 837
## 6 2013 1 1 554 558 -4 740 728
## 7 2013 1 1 555 600 -5 913 854
## 8 2013 1 1 557 600 -3 709 723
## 9 2013 1 1 557 600 -3 838 846
## 10 2013 1 1 558 600 -2 753 745
## # ℹ 832 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
filter(flights, month == 1 & day == 1)
## # A tibble: 842 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 542 540 2 923 850
## 4 2013 1 1 544 545 -1 1004 1022
## 5 2013 1 1 554 600 -6 812 837
## 6 2013 1 1 554 558 -4 740 728
## 7 2013 1 1 555 600 -5 913 854
## 8 2013 1 1 557 600 -3 709 723
## 9 2013 1 1 557 600 -3 838 846
## 10 2013 1 1 558 600 -2 753 745
## # ℹ 832 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
filter(flights, month == 1 | day == 1)
## # A tibble: 37,198 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 542 540 2 923 850
## 4 2013 1 1 544 545 -1 1004 1022
## 5 2013 1 1 554 600 -6 812 837
## 6 2013 1 1 554 558 -4 740 728
## 7 2013 1 1 555 600 -5 913 854
## 8 2013 1 1 557 600 -3 709 723
## 9 2013 1 1 557 600 -3 838 846
## 10 2013 1 1 558 600 -2 753 745
## # ℹ 37,188 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
filter(flights, month %in% c(11, 12))
## # A tibble: 55,403 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 11 1 5 2359 6 352 345
## 2 2013 11 1 35 2250 105 123 2356
## 3 2013 11 1 455 500 -5 641 651
## 4 2013 11 1 539 545 -6 856 827
## 5 2013 11 1 542 545 -3 831 855
## 6 2013 11 1 549 600 -11 912 923
## 7 2013 11 1 550 600 -10 705 659
## 8 2013 11 1 554 600 -6 659 701
## 9 2013 11 1 554 600 -6 826 827
## 10 2013 11 1 554 600 -6 749 751
## # ℹ 55,393 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
select columns
select(flights, year:dep_time)
## # A tibble: 336,776 × 4
## year month day dep_time
## <int> <int> <int> <int>
## 1 2013 1 1 517
## 2 2013 1 1 533
## 3 2013 1 1 542
## 4 2013 1 1 544
## 5 2013 1 1 554
## 6 2013 1 1 554
## 7 2013 1 1 555
## 8 2013 1 1 557
## 9 2013 1 1 557
## 10 2013 1 1 558
## # ℹ 336,766 more rows
select(flights, year, month, day, dep_time)
## # A tibble: 336,776 × 4
## year month day dep_time
## <int> <int> <int> <int>
## 1 2013 1 1 517
## 2 2013 1 1 533
## 3 2013 1 1 542
## 4 2013 1 1 544
## 5 2013 1 1 554
## 6 2013 1 1 554
## 7 2013 1 1 555
## 8 2013 1 1 557
## 9 2013 1 1 557
## 10 2013 1 1 558
## # ℹ 336,766 more rows
select(flights, year, month, day, dep_time, dep_delay)
## # A tibble: 336,776 × 5
## year month day dep_time dep_delay
## <int> <int> <int> <int> <dbl>
## 1 2013 1 1 517 2
## 2 2013 1 1 533 4
## 3 2013 1 1 542 2
## 4 2013 1 1 544 -1
## 5 2013 1 1 554 -6
## 6 2013 1 1 554 -4
## 7 2013 1 1 555 -5
## 8 2013 1 1 557 -3
## 9 2013 1 1 557 -3
## 10 2013 1 1 558 -2
## # ℹ 336,766 more rows
select(flights, year, month, day, starts_with("dep"))
## # A tibble: 336,776 × 5
## year month day dep_time dep_delay
## <int> <int> <int> <int> <dbl>
## 1 2013 1 1 517 2
## 2 2013 1 1 533 4
## 3 2013 1 1 542 2
## 4 2013 1 1 544 -1
## 5 2013 1 1 554 -6
## 6 2013 1 1 554 -4
## 7 2013 1 1 555 -5
## 8 2013 1 1 557 -3
## 9 2013 1 1 557 -3
## 10 2013 1 1 558 -2
## # ℹ 336,766 more rows
select(flights, year, month, day, contains("time"))
## # A tibble: 336,776 × 9
## year month day dep_time sched_dep_time arr_time sched_arr_time air_time
## <int> <int> <int> <int> <int> <int> <int> <dbl>
## 1 2013 1 1 517 515 830 819 227
## 2 2013 1 1 533 529 850 830 227
## 3 2013 1 1 542 540 923 850 160
## 4 2013 1 1 544 545 1004 1022 183
## 5 2013 1 1 554 600 812 837 116
## 6 2013 1 1 554 558 740 728 150
## 7 2013 1 1 555 600 913 854 158
## 8 2013 1 1 557 600 709 723 53
## 9 2013 1 1 557 600 838 846 140
## 10 2013 1 1 558 600 753 745 138
## # ℹ 336,766 more rows
## # ℹ 1 more variable: time_hour <dttm>
select(flights, year, month, day, ends_with("time"))
## # A tibble: 336,776 × 8
## year month day dep_time sched_dep_time arr_time sched_arr_time air_time
## <int> <int> <int> <int> <int> <int> <int> <dbl>
## 1 2013 1 1 517 515 830 819 227
## 2 2013 1 1 533 529 850 830 227
## 3 2013 1 1 542 540 923 850 160
## 4 2013 1 1 544 545 1004 1022 183
## 5 2013 1 1 554 600 812 837 116
## 6 2013 1 1 554 558 740 728 150
## 7 2013 1 1 555 600 913 854 158
## 8 2013 1 1 557 600 709 723 53
## 9 2013 1 1 557 600 838 846 140
## 10 2013 1 1 558 600 753 745 138
## # ℹ 336,766 more rows
select(flights, year, month, day, contains("time"), everything())
## # A tibble: 336,776 × 19
## year month day dep_time sched_dep_time arr_time sched_arr_time air_time
## <int> <int> <int> <int> <int> <int> <int> <dbl>
## 1 2013 1 1 517 515 830 819 227
## 2 2013 1 1 533 529 850 830 227
## 3 2013 1 1 542 540 923 850 160
## 4 2013 1 1 544 545 1004 1022 183
## 5 2013 1 1 554 600 812 837 116
## 6 2013 1 1 554 558 740 728 150
## 7 2013 1 1 555 600 913 854 158
## 8 2013 1 1 557 600 709 723 53
## 9 2013 1 1 557 600 838 846 140
## 10 2013 1 1 558 600 753 745 138
## # ℹ 336,766 more rows
## # ℹ 11 more variables: time_hour <dttm>, dep_delay <dbl>, arr_delay <dbl>,
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # distance <dbl>, hour <dbl>, minute <dbl>
mutate(flights,
gain = dep_delay - arr_delay) %>%
# select, year, month, day, and gain
select(year:day, gain)
## # A tibble: 336,776 × 4
## year month day gain
## <int> <int> <int> <dbl>
## 1 2013 1 1 -9
## 2 2013 1 1 -16
## 3 2013 1 1 -31
## 4 2013 1 1 17
## 5 2013 1 1 19
## 6 2013 1 1 -16
## 7 2013 1 1 -24
## 8 2013 1 1 11
## 9 2013 1 1 5
## 10 2013 1 1 -10
## # ℹ 336,766 more rows
#just keep gain
mutate(flights,
gain = dep_delay - arr_delay) %>%
# select, year, month, day, and gain
select(gain)
## # A tibble: 336,776 × 1
## gain
## <dbl>
## 1 -9
## 2 -16
## 3 -31
## 4 17
## 5 19
## 6 -16
## 7 -24
## 8 11
## 9 5
## 10 -10
## # ℹ 336,766 more rows
#alternative using transmute()
transmute(flights,
gain = dep_delay - arr_delay)
## # A tibble: 336,776 × 1
## gain
## <dbl>
## 1 -9
## 2 -16
## 3 -31
## 4 17
## 5 19
## 6 -16
## 7 -24
## 8 11
## 9 5
## 10 -10
## # ℹ 336,766 more rows
#lag()
select(flights, dep_time) %>%
mutate(dep_time_lag1 = lag(dep_time))
## # A tibble: 336,776 × 2
## dep_time dep_time_lag1
## <int> <int>
## 1 517 NA
## 2 533 517
## 3 542 533
## 4 544 542
## 5 554 544
## 6 554 554
## 7 555 554
## 8 557 555
## 9 557 557
## 10 558 557
## # ℹ 336,766 more rows
#cumsum()
select(flights, minute) %>%
mutate(minute_cumsum = cumsum(minute))
## # A tibble: 336,776 × 2
## minute minute_cumsum
## <dbl> <dbl>
## 1 15 15
## 2 29 44
## 3 40 84
## 4 45 129
## 5 0 129
## 6 58 187
## 7 0 187
## 8 0 187
## 9 0 187
## 10 0 187
## # ℹ 336,766 more rows
1000 / 10
## [1] 100
x <- 1000 / 10
x
## [1] 100