library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(magrittr)
library(nycflights13)
library(tibble)
flights.tbl <- tbl_df(flights)
flights.tbl
## # A tibble: 336,776 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 517 515 2 830
## 2 2013 1 1 533 529 4 850
## 3 2013 1 1 542 540 2 923
## 4 2013 1 1 544 545 -1 1004
## 5 2013 1 1 554 600 -6 812
## 6 2013 1 1 554 558 -4 740
## 7 2013 1 1 555 600 -5 913
## 8 2013 1 1 557 600 -3 709
## 9 2013 1 1 557 600 -3 838
## 10 2013 1 1 558 600 -2 753
## # ... with 336,766 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
new.flights <- filter(flights.tbl, origin=="JFK", carrier== "UA", month==3)
new.flights
## # A tibble: 378 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 3 1 600 600 0 906
## 2 2013 3 1 607 610 -3 832
## 3 2013 3 1 655 700 -5 954
## 4 2013 3 1 758 800 -2 1106
## 5 2013 3 1 836 840 -4 1111
## 6 2013 3 1 1103 1106 -3 1400
## 7 2013 3 1 1125 1130 -5 1350
## 8 2013 3 1 1423 1425 -2 1728
## 9 2013 3 1 1621 1530 51 1844
## 10 2013 3 1 1723 1729 -6 2010
## # ... with 368 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
newer.flights <- select(new.flights, origin, carrier, month, air_time, distance)
newer.flights
## # A tibble: 378 x 5
## origin carrier month air_time distance
## <chr> <chr> <int> <dbl> <dbl>
## 1 JFK UA 3 342 2586
## 2 JFK UA 3 292 2475
## 3 JFK UA 3 343 2586
## 4 JFK UA 3 342 2586
## 5 JFK UA 3 301 2475
## 6 JFK UA 3 338 2586
## 7 JFK UA 3 307 2475
## 8 JFK UA 3 337 2586
## 9 JFK UA 3 300 2475
## 10 JFK UA 3 320 2586
## # ... with 368 more rows
min.air_time<- min(newer.flights$air_time, na.rm = TRUE)
min.air_time
## [1] 281
max.air_time <- max(newer.flights$air_time, na.rm = TRUE)
max.air_time
## [1] 394
mean.air_time <- mean(newer.flights$air_time, na.rm = TRUE)
round.mean <- round(mean.air_time, digits = 2)
round.mean
## [1] 342.93
mean.distance <- mean(newer.flights$distance, na.rm = TRUE)
mean.distance
## [1] 2534.317
round.mean.distance <- round(mean.distance, digits = 2)
round.mean.distance
## [1] 2534.32
departure.delays <- filter(flights.tbl, dep_delay>0, month==6)
departure.delays
## # A tibble: 12,655 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 6 1 2 2359 3 341
## 2 2013 6 1 607 600 7 853
## 3 2013 6 1 614 605 9 844
## 4 2013 6 1 614 600 14 829
## 5 2013 6 1 615 610 5 837
## 6 2013 6 1 624 600 24 727
## 7 2013 6 1 632 630 2 738
## 8 2013 6 1 638 635 3 855
## 9 2013 6 1 638 630 8 741
## 10 2013 6 1 644 642 2 824
## # ... with 12,645 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
new.departure.delays <- select(departure.delays, month, dep_delay, origin)
new.departure.delays
## # A tibble: 12,655 x 3
## month dep_delay origin
## <int> <dbl> <chr>
## 1 6 3 JFK
## 2 6 7 EWR
## 3 6 9 EWR
## 4 6 14 EWR
## 5 6 5 JFK
## 6 6 24 EWR
## 7 6 2 EWR
## 8 6 3 JFK
## 9 6 8 EWR
## 10 6 2 EWR
## # ... with 12,645 more rows
newest.departure.delays <- new.departure.delays %>%
arrange(origin)
newest.departure.delays
## # A tibble: 12,655 x 3
## month dep_delay origin
## <int> <dbl> <chr>
## 1 6 7 EWR
## 2 6 9 EWR
## 3 6 14 EWR
## 4 6 24 EWR
## 5 6 2 EWR
## 6 6 8 EWR
## 7 6 2 EWR
## 8 6 21 EWR
## 9 6 36 EWR
## 10 6 1 EWR
## # ... with 12,645 more rows
summarise.newest.departure.delays <- newest.departure.delays %>%
group_by(origin) %>%
summarise(min.dep_delay=min(dep_delay),
max.dep_delay=max(dep_delay),
mean.dep_delay=mean(dep_delay))
summarise.newest.departure.delays
## # A tibble: 3 x 4
## origin min.dep_delay max.dep_delay mean.dep_delay
## <chr> <dbl> <dbl> <dbl>
## 1 EWR 1 502 47.92212
## 2 JFK 1 1137 47.98522
## 3 LGA 1 803 54.96745
flights.mph <- select(flights.tbl, distance, air_time, dest, origin, month, carrier)
flights.mph
## # A tibble: 336,776 x 6
## distance air_time dest origin month carrier
## <dbl> <dbl> <chr> <chr> <int> <chr>
## 1 1400 227 IAH EWR 1 UA
## 2 1416 227 IAH LGA 1 UA
## 3 1089 160 MIA JFK 1 AA
## 4 1576 183 BQN JFK 1 B6
## 5 762 116 ATL LGA 1 DL
## 6 719 150 ORD EWR 1 UA
## 7 1065 158 FLL EWR 1 B6
## 8 229 53 IAD LGA 1 EV
## 9 944 140 MCO JFK 1 B6
## 10 733 138 ORD LGA 1 AA
## # ... with 336,766 more rows
new.flights.mph <- filter(flights.mph, month %in% c(6,7,8), carrier %in% c("AA", "UA"), dest=="ORD")
new.flights.mph
## # A tibble: 3,500 x 6
## distance air_time dest origin month carrier
## <dbl> <dbl> <chr> <chr> <int> <chr>
## 1 733 108 ORD LGA 6 AA
## 2 733 109 ORD LGA 6 UA
## 3 733 110 ORD LGA 6 AA
## 4 719 108 ORD EWR 6 UA
## 5 733 109 ORD LGA 6 AA
## 6 733 108 ORD LGA 6 UA
## 7 733 112 ORD LGA 6 AA
## 8 733 105 ORD LGA 6 AA
## 9 719 110 ORD EWR 6 UA
## 10 733 109 ORD LGA 6 AA
## # ... with 3,490 more rows
newer.flights.mph <- new.flights.mph %>%
select(air_time, distance) %>%
mutate(hour=air_time/60) %>%
mutate(mph=distance/hour)
newer.flights.mph
## # A tibble: 3,500 x 4
## air_time distance hour mph
## <dbl> <dbl> <dbl> <dbl>
## 1 108 733 1.800000 407.2222
## 2 109 733 1.816667 403.4862
## 3 110 733 1.833333 399.8182
## 4 108 719 1.800000 399.4444
## 5 109 733 1.816667 403.4862
## 6 108 733 1.800000 407.2222
## 7 112 733 1.866667 392.6786
## 8 105 733 1.750000 418.8571
## 9 110 719 1.833333 392.1818
## 10 109 733 1.816667 403.4862
## # ... with 3,490 more rows
summarise.newer.flights.mph <- newer.flights.mph %>%
summarise(min.mph=min(mph, na.rm = TRUE),
max.mph=max(mph, na.rm = TRUE),
mean.mph=mean(mph, na.rm = TRUE))
summarise.newer.flights.mph
## # A tibble: 1 x 3
## min.mph max.mph mean.mph
## <dbl> <dbl> <dbl>
## 1 231.4737 495.8621 396.5095