library(pacman)
p_load(nycflights13)
#View(flights)
summary(flights)
##       year          month             day           dep_time    sched_dep_time
##  Min.   :2013   Min.   : 1.000   Min.   : 1.00   Min.   :   1   Min.   : 106  
##  1st Qu.:2013   1st Qu.: 4.000   1st Qu.: 8.00   1st Qu.: 907   1st Qu.: 906  
##  Median :2013   Median : 7.000   Median :16.00   Median :1401   Median :1359  
##  Mean   :2013   Mean   : 6.549   Mean   :15.71   Mean   :1349   Mean   :1344  
##  3rd Qu.:2013   3rd Qu.:10.000   3rd Qu.:23.00   3rd Qu.:1744   3rd Qu.:1729  
##  Max.   :2013   Max.   :12.000   Max.   :31.00   Max.   :2400   Max.   :2359  
##                                                  NA's   :8255                 
##    dep_delay          arr_time    sched_arr_time   arr_delay       
##  Min.   : -43.00   Min.   :   1   Min.   :   1   Min.   : -86.000  
##  1st Qu.:  -5.00   1st Qu.:1104   1st Qu.:1124   1st Qu.: -17.000  
##  Median :  -2.00   Median :1535   Median :1556   Median :  -5.000  
##  Mean   :  12.64   Mean   :1502   Mean   :1536   Mean   :   6.895  
##  3rd Qu.:  11.00   3rd Qu.:1940   3rd Qu.:1945   3rd Qu.:  14.000  
##  Max.   :1301.00   Max.   :2400   Max.   :2359   Max.   :1272.000  
##  NA's   :8255      NA's   :8713                  NA's   :9430      
##    carrier              flight       tailnum             origin         
##  Length:336776      Min.   :   1   Length:336776      Length:336776     
##  Class :character   1st Qu.: 553   Class :character   Class :character  
##  Mode  :character   Median :1496   Mode  :character   Mode  :character  
##                     Mean   :1972                                        
##                     3rd Qu.:3465                                        
##                     Max.   :8500                                        
##                                                                         
##      dest              air_time        distance         hour      
##  Length:336776      Min.   : 20.0   Min.   :  17   Min.   : 1.00  
##  Class :character   1st Qu.: 82.0   1st Qu.: 502   1st Qu.: 9.00  
##  Mode  :character   Median :129.0   Median : 872   Median :13.00  
##                     Mean   :150.7   Mean   :1040   Mean   :13.18  
##                     3rd Qu.:192.0   3rd Qu.:1389   3rd Qu.:17.00  
##                     Max.   :695.0   Max.   :4983   Max.   :23.00  
##                     NA's   :9430                                  
##      minute        time_hour                     
##  Min.   : 0.00   Min.   :2013-01-01 05:00:00.00  
##  1st Qu.: 8.00   1st Qu.:2013-04-04 13:00:00.00  
##  Median :29.00   Median :2013-07-03 10:00:00.00  
##  Mean   :26.23   Mean   :2013-07-03 05:22:54.64  
##  3rd Qu.:44.00   3rd Qu.:2013-10-01 07:00:00.00  
##  Max.   :59.00   Max.   :2013-12-31 23:00:00.00  
## 
#summarise(flights, delay=mean(dep_delay,na.rm=TRUE))
#2
maxdep <- max(flights$dep_delay, na.rm=TRUE)

maxdep_id <- which(flights$dep_delay==maxdep)

flights[maxdep_id, 10:12]
## # A tibble: 1 × 3
##   carrier flight tailnum
##   <chr>    <int> <chr>  
## 1 HA          51 N384HA
#sortf <- arrange(flights,desc(dep_delay)) 

#select(sortf, carrier, flight, tailnum, everything())
#2
#select(flights, starts_with("dep"))
#2
#not_cancelled <- flights %>% 
 #filter(!is.na(dep_delay))

#not_cancelled %>% 
 #group_by(year, month, day) %>% 
 #summarise(mean = mean(dep_delay))
#3
#flights %>% 
 #group_by(year, month, day) %>% 
 #summarise(mean = mean(dep_delay, na.rm = TRUE))
#3
#not_cancelled <- flights %>% 
 #filter(!is.na(dep_delay), !is.na(arr_delay))
#4
#not_cancelled <- flights %>% 
 #filter(!is.na(dep_delay))

#lowest_arr_delay <- not_cancelled %>%
  #group_by(tailnum) %>%
  #summarise(mean_arr_delay = mean(arr_delay, na.rm = TRUE)) %>%
  #slice_min(mean_arr_delay, n = 1)
#print(lowest_arr_delay)
#5
#not_cancelled %>% 
 #group_by(year, month, day) %>% 
 #summarise(
  #first = min(dep_time),
  #last = max(dep_time)
 #)
#6
#monthly_delay_proportion <- flights %>%
  #group_by(month) %>%
  #summarise(
    #total_flights = n(), 
    #delayed_flights = sum(dep_delay > 60, na.rm = TRUE),
    #proportion_delayed = delayed_flights / total_flights  
  #) %>%
  #print(monthly_delay_proportion)
#7
#dest_carriers <- flights %>%
  #group_by(dest) %>%
  #summarise(num_carriers = n_distinct(carrier)) %>%
  #arrange(desc(num_carriers))


#max_carriers <- max(dest_carriers$num_carriers)
#most_carriers_dest <- dest_carriers %>%
  #filter(num_carriers == max_carriers)

#print(most_carriers_dest)
#9
#delays <- flights %>% 
 #group_by(dest) %>% 
 #summarise(
  #count = n(),
  #dist = mean(distance, na.rm = TRUE),
  #delay = mean(arr_delay, na.rm = TRUE)
 #) %>% 
 #filter(count > 20, dest != "HNL")