library(dplyr)
library(magrittr)
library(nycflights13)
library(tibble)
flights.tbl <- tbl_df(flights)
flights.tbl
# A tibble: 336,776 x 19
year month day dep_time sched_dep_time dep_delay arr_time
<int> <int> <int> <int> <int> <dbl> <int>
1 2013 1 1 517 515 2 830
2 2013 1 1 533 529 4 850
3 2013 1 1 542 540 2 923
4 2013 1 1 544 545 -1 1004
5 2013 1 1 554 600 -6 812
6 2013 1 1 554 558 -4 740
7 2013 1 1 555 600 -5 913
8 2013 1 1 557 600 -3 709
9 2013 1 1 557 600 -3 838
10 2013 1 1 558 600 -2 753
# ... with 336,766 more rows, and 12 more variables: sched_arr_time <int>,
# arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
# origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
# minute <dbl>, time_hour <dttm>
new.flights <- filter(flights.tbl, origin=="JFK", carrier=="UA", month==3)
new.flights
# A tibble: 378 x 19
year month day dep_time sched_dep_time dep_delay arr_time
<int> <int> <int> <int> <int> <dbl> <int>
1 2013 3 1 600 600 0 906
2 2013 3 1 607 610 -3 832
3 2013 3 1 655 700 -5 954
4 2013 3 1 758 800 -2 1106
5 2013 3 1 836 840 -4 1111
6 2013 3 1 1103 1106 -3 1400
7 2013 3 1 1125 1130 -5 1350
8 2013 3 1 1423 1425 -2 1728
9 2013 3 1 1621 1530 51 1844
10 2013 3 1 1723 1729 -6 2010
# ... with 368 more rows, and 12 more variables: sched_arr_time <int>,
# arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
# origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
# minute <dbl>, time_hour <dttm>
newer.flights <- select(new.flights, origin, carrier, month, air_time, distance)
newer.flights
# A tibble: 378 x 5
origin carrier month air_time distance
<chr> <chr> <int> <dbl> <dbl>
1 JFK UA 3 342 2586
2 JFK UA 3 292 2475
3 JFK UA 3 343 2586
4 JFK UA 3 342 2586
5 JFK UA 3 301 2475
6 JFK UA 3 338 2586
7 JFK UA 3 307 2475
8 JFK UA 3 337 2586
9 JFK UA 3 300 2475
10 JFK UA 3 320 2586
# ... with 368 more rows
min.air_time <- min(newer.flights$air_time, na.rm = TRUE)
min.air_time
[1] 281
max.air_time <- max(newer.flights$air_time, na.rm = TRUE)
max.air_time
[1] 394
mean.air_time <- mean(newer.flights$air_time, na.rm = TRUE)
round.mean.air_time <- round (mean.air_time, digits = 2)
round.mean.air_time
[1] 342.93
mean.distance <- mean(newer.flights$distance, na.rm = TRUE)
round.mean.distance <- round(mean.distance, digits = 2)
round.mean.distance
[1] 2534.32
departure.delays <- filter(flights.tbl, dep_delay>0, month==6)
departure.delays
# A tibble: 12,655 x 19
year month day dep_time sched_dep_time dep_delay arr_time
<int> <int> <int> <int> <int> <dbl> <int>
1 2013 6 1 2 2359 3 341
2 2013 6 1 607 600 7 853
3 2013 6 1 614 605 9 844
4 2013 6 1 614 600 14 829
5 2013 6 1 615 610 5 837
6 2013 6 1 624 600 24 727
7 2013 6 1 632 630 2 738
8 2013 6 1 638 635 3 855
9 2013 6 1 638 630 8 741
10 2013 6 1 644 642 2 824
# ... with 12,645 more rows, and 12 more variables: sched_arr_time <int>,
# arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
# origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
# minute <dbl>, time_hour <dttm>
new.departure.delays <- select(departure.delays, dep_delay, month, origin)
new.departure.delays
# A tibble: 12,655 x 3
dep_delay month origin
<dbl> <int> <chr>
1 3 6 JFK
2 7 6 EWR
3 9 6 EWR
4 14 6 EWR
5 5 6 JFK
6 24 6 EWR
7 2 6 EWR
8 3 6 JFK
9 8 6 EWR
10 2 6 EWR
# ... with 12,645 more rows
arrange.new.departure.delays <- new.departure.delays %>%
arrange(origin)
arrange.new.departure.delays
# A tibble: 12,655 x 3
dep_delay month origin
<dbl> <int> <chr>
1 7 6 EWR
2 9 6 EWR
3 14 6 EWR
4 24 6 EWR
5 2 6 EWR
6 8 6 EWR
7 2 6 EWR
8 21 6 EWR
9 36 6 EWR
10 1 6 EWR
# ... with 12,645 more rows
summarise.arrange.new.departure.delays <- arrange.new.departure.delays %>%
group_by(origin) %>%
summarise(min.dep_delay=min(dep_delay),
max.dep_delay=max(dep_delay),
mean.dep_delay=mean(dep_delay))
summarise.arrange.new.departure.delays
# A tibble: 3 x 4
origin min.dep_delay max.dep_delay mean.dep_delay
<chr> <dbl> <dbl> <dbl>
1 EWR 1 502 47.92212
2 JFK 1 1137 47.98522
3 LGA 1 803 54.96745
flights.mph <- select(flights.tbl, origin, carrier, dest, distance, air_time, month)
flights.mph
# A tibble: 336,776 x 6
origin carrier dest distance air_time month
<chr> <chr> <chr> <dbl> <dbl> <int>
1 EWR UA IAH 1400 227 1
2 LGA UA IAH 1416 227 1
3 JFK AA MIA 1089 160 1
4 JFK B6 BQN 1576 183 1
5 LGA DL ATL 762 116 1
6 EWR UA ORD 719 150 1
7 EWR B6 FLL 1065 158 1
8 LGA EV IAD 229 53 1
9 JFK B6 MCO 944 140 1
10 LGA AA ORD 733 138 1
# ... with 336,766 more rows
new.flights.mph <- filter(flights.mph, month %in% c(6,7,8), carrier %in% c("AA","UA"), dest=="ORD")
new.flights.mph
# A tibble: 3,500 x 6
origin carrier dest distance air_time month
<chr> <chr> <chr> <dbl> <dbl> <int>
1 LGA AA ORD 733 108 6
2 LGA UA ORD 733 109 6
3 LGA AA ORD 733 110 6
4 EWR UA ORD 719 108 6
5 LGA AA ORD 733 109 6
6 LGA UA ORD 733 108 6
7 LGA AA ORD 733 112 6
8 LGA AA ORD 733 105 6
9 EWR UA ORD 719 110 6
10 LGA AA ORD 733 109 6
# ... with 3,490 more rows
newer.flights.mph <- new.flights.mph %>%
select(air_time, distance) %>%
mutate(hour= air_time/60) %>%
mutate(mph=distance/hour)
newer.flights.mph
# A tibble: 3,500 x 4
air_time distance hour mph
<dbl> <dbl> <dbl> <dbl>
1 108 733 1.800000 407.2222
2 109 733 1.816667 403.4862
3 110 733 1.833333 399.8182
4 108 719 1.800000 399.4444
5 109 733 1.816667 403.4862
6 108 733 1.800000 407.2222
7 112 733 1.866667 392.6786
8 105 733 1.750000 418.8571
9 110 719 1.833333 392.1818
10 109 733 1.816667 403.4862
# ... with 3,490 more rows
summarise.newer.flights.mph <- newer.flights.mph %>%
summarise(min.mph=min(mph,na.rm=TRUE),
max.mph=max(mph,na.rm=TRUE),
mean.mph=mean(mph,na.rm=TRUE))
summarise.newer.flights.mph
# A tibble: 1 x 3
min.mph max.mph mean.mph
<dbl> <dbl> <dbl>
1 231.4737 495.8621 396.5095