#install.packages("nycflights13")
library(nycflights13)
## Warning: package 'nycflights13' was built under R version 4.0.5
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
nrow(flights)
## [1] 336776
ncol(flights)
## [1] 19
str(flights)
## tibble [336,776 x 19] (S3: tbl_df/tbl/data.frame)
## $ year : int [1:336776] 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
## $ month : int [1:336776] 1 1 1 1 1 1 1 1 1 1 ...
## $ day : int [1:336776] 1 1 1 1 1 1 1 1 1 1 ...
## $ dep_time : int [1:336776] 517 533 542 544 554 554 555 557 557 558 ...
## $ sched_dep_time: int [1:336776] 515 529 540 545 600 558 600 600 600 600 ...
## $ dep_delay : num [1:336776] 2 4 2 -1 -6 -4 -5 -3 -3 -2 ...
## $ arr_time : int [1:336776] 830 850 923 1004 812 740 913 709 838 753 ...
## $ sched_arr_time: int [1:336776] 819 830 850 1022 837 728 854 723 846 745 ...
## $ arr_delay : num [1:336776] 11 20 33 -18 -25 12 19 -14 -8 8 ...
## $ carrier : chr [1:336776] "UA" "UA" "AA" "B6" ...
## $ flight : int [1:336776] 1545 1714 1141 725 461 1696 507 5708 79 301 ...
## $ tailnum : chr [1:336776] "N14228" "N24211" "N619AA" "N804JB" ...
## $ origin : chr [1:336776] "EWR" "LGA" "JFK" "JFK" ...
## $ dest : chr [1:336776] "IAH" "IAH" "MIA" "BQN" ...
## $ air_time : num [1:336776] 227 227 160 183 116 150 158 53 140 138 ...
## $ distance : num [1:336776] 1400 1416 1089 1576 762 ...
## $ hour : num [1:336776] 5 5 5 5 6 5 6 6 6 6 ...
## $ minute : num [1:336776] 15 29 40 45 0 58 0 0 0 0 ...
## $ time_hour : POSIXct[1:336776], format: "2013-01-01 05:00:00" "2013-01-01 05:00:00" ...
Use the wrangling verbs filter(), count() and summarise() on the flights data to answer the following: What is the average number of flights that United Airlines (UA) flies out of JFK between 8.00 am and 9.00 am by day of the month?
flights %>%
filter(origin == "JFK", carrier == "UA", dep_time >= 800, dep_time <= 900) %>%
summarise(mean = n()/31)
## # A tibble: 1 x 1
## mean
## <dbl>
## 1 19
flights %>%
filter(origin == "JFK", carrier == "UA", dep_time >= 800, dep_time <= 900) %>%
count(day) %>%
summarise(mean = sum(n)/31)
## # A tibble: 1 x 1
## mean
## <dbl>
## 1 19
Use the wrangling verbs, group_by(), summarise() and arrange() to find the plane that has the highest total arrival delay (minutes).
flights %>%
group_by(tailnum) %>%
summarise(total_delays = sum(arr_delay, na.rm = TRUE)) %>%
arrange(desc(total_delays)) %>%
slice(1)
## # A tibble: 1 x 2
## tailnum total_delays
## <chr> <dbl>
## 1 N15910 7317
flights %>%
group_by(tailnum) %>%
summarise(total_delays = sum(arr_delay, na.rm = TRUE)) %>%
arrange(desc(total_delays))
## # A tibble: 4,044 x 2
## tailnum total_delays
## <chr> <dbl>
## 1 N15910 7317
## 2 N15980 7134
## 3 N16919 6904
## 4 N228JB 6778
## 5 N14998 6087
## 6 N192JB 5810
## 7 N292JB 5804
## 8 N12921 5788
## 9 N13958 5620
## 10 N10575 5566
## # ... with 4,034 more rows