# install.packages("pacman")
pacman::p_load(nycflights13)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Question #1

#view(flights)
#glimpse(flights) 

#summary(flights)

Question #2 Flight with Largest Departure delay

#select(flights, starts_with("dep"))

#summarise(flights, delay=mean(dep_delay,na.rm=TRUE))

Question #3

#flights %>% group_by(year, month, day) %>% 
#summarise(mean = mean(dep_delay))
#not_cancelled <- flights %>% 

  #filter(!is.na(dep_delay), !is.na(arr_delay))
#delays <- not_cancelled %>% 
# group_by(tailnum) %>% 
#summarise(

  #delay = mean(arr_delay) )
#not_cancelled <- flights %>% 
# filter(!is.na(dep_delay))

#not_cancelled %>% 
#group_by(year, month, day) %>% 
 #summarise(mean = mean(dep_delay))

Question #4

delays <- flights %>% 
  group_by(tailnum) %>% 
  summarise(avg_arr_delay = mean(arr_delay, na.rm = TRUE))

delays %>% arrange(avg_arr_delay) %>% head(1)
## # A tibble: 1 × 2
##   tailnum avg_arr_delay
##   <chr>           <dbl>
## 1 N560AS            -53

Question #5

#not_cancelled %>% 
#group_by(year, month, day) %>% 
#summarise( first = min(dep_time),
#last = max(dep_time))

Question #6

flights %>% 
  mutate(delay_1hr = dep_delay > 60) %>% 
  group_by(month) %>% 
  summarise(prop = mean(delay_1hr, na.rm = TRUE)) %>% 
  arrange(desc(prop))
## # A tibble: 12 × 2
##    month   prop
##    <int>  <dbl>
##  1     7 0.134 
##  2     6 0.128 
##  3    12 0.0942
##  4     4 0.0916
##  5     3 0.0837
##  6     5 0.0818
##  7     8 0.0796
##  8     2 0.0698
##  9     1 0.0688
## 10     9 0.0490
## 11    10 0.0469
## 12    11 0.0402

Question #7

flights %>% 
  group_by(dest) %>% 
  summarise(carrier_count = n_distinct(carrier)) %>% 
  arrange(desc(carrier_count))
## # A tibble: 105 × 2
##    dest  carrier_count
##    <chr>         <int>
##  1 ATL               7
##  2 BOS               7
##  3 CLT               7
##  4 ORD               7
##  5 TPA               7
##  6 AUS               6
##  7 DCA               6
##  8 DTW               6
##  9 IAD               6
## 10 MSP               6
## # ℹ 95 more rows

Question #9

delays <- flights %>% 
 group_by(dest) %>% 
 summarise(
  count = n(),
  dist = mean(distance, na.rm = TRUE),
  delay = mean(arr_delay, na.rm = TRUE)
 ) %>% 
 filter(count > 20, dest != "HNL")