Question 1

library(nycflights13)
library(tidyverse)
pacman::p_load(nycflights13)
summary(flights)
      year          month             day           dep_time    sched_dep_time
 Min.   :2013   Min.   : 1.000   Min.   : 1.00   Min.   :   1   Min.   : 106  
 1st Qu.:2013   1st Qu.: 4.000   1st Qu.: 8.00   1st Qu.: 907   1st Qu.: 906  
 Median :2013   Median : 7.000   Median :16.00   Median :1401   Median :1359  
 Mean   :2013   Mean   : 6.549   Mean   :15.71   Mean   :1349   Mean   :1344  
 3rd Qu.:2013   3rd Qu.:10.000   3rd Qu.:23.00   3rd Qu.:1744   3rd Qu.:1729  
 Max.   :2013   Max.   :12.000   Max.   :31.00   Max.   :2400   Max.   :2359  
                                                 NA's   :8255                 
   dep_delay          arr_time    sched_arr_time   arr_delay          carrier         
 Min.   : -43.00   Min.   :   1   Min.   :   1   Min.   : -86.000   Length:336776     
 1st Qu.:  -5.00   1st Qu.:1104   1st Qu.:1124   1st Qu.: -17.000   Class :character  
 Median :  -2.00   Median :1535   Median :1556   Median :  -5.000   Mode  :character  
 Mean   :  12.64   Mean   :1502   Mean   :1536   Mean   :   6.895                     
 3rd Qu.:  11.00   3rd Qu.:1940   3rd Qu.:1945   3rd Qu.:  14.000                     
 Max.   :1301.00   Max.   :2400   Max.   :2359   Max.   :1272.000                     
 NA's   :8255      NA's   :8713                  NA's   :9430                         
     flight       tailnum             origin              dest              air_time    
 Min.   :   1   Length:336776      Length:336776      Length:336776      Min.   : 20.0  
 1st Qu.: 553   Class :character   Class :character   Class :character   1st Qu.: 82.0  
 Median :1496   Mode  :character   Mode  :character   Mode  :character   Median :129.0  
 Mean   :1972                                                            Mean   :150.7  
 3rd Qu.:3465                                                            3rd Qu.:192.0  
 Max.   :8500                                                            Max.   :695.0  
                                                                         NA's   :9430   
    distance         hour           minute        time_hour                     
 Min.   :  17   Min.   : 1.00   Min.   : 0.00   Min.   :2013-01-01 05:00:00.00  
 1st Qu.: 502   1st Qu.: 9.00   1st Qu.: 8.00   1st Qu.:2013-04-04 13:00:00.00  
 Median : 872   Median :13.00   Median :29.00   Median :2013-07-03 10:00:00.00  
 Mean   :1040   Mean   :13.18   Mean   :26.23   Mean   :2013-07-03 05:22:54.64  
 3rd Qu.:1389   3rd Qu.:17.00   3rd Qu.:44.00   3rd Qu.:2013-10-01 07:00:00.00  
 Max.   :4983   Max.   :23.00   Max.   :59.00   Max.   :2013-12-31 23:00:00.00  
                                                                                
View(flights)

Question 2



maxdep <- max(flights$dep_delay, na.rm=TRUE)

maxdep_id <- which(flights$dep_delay==maxdep)

flights[maxdep_id, 10:12]

not_cancelled <- flights %>% 
  filter(!is.na(dep_delay), !is.na(arr_delay))


select(flights, starts_with("dep"))

sortf <- arrange(flights,desc(dep_delay)) 

select(sortf, carrier, flight, tailnum, everything())
not_cancelled %>% group_by(year, month, day) %>% 
  summarise(
    first = min(dep_time),
    last = max(dep_time)
  )
`summarise()` has grouped output by 'year', 'month'. You can override using the `.groups`
argument.

Question 3

not_cancelled <- flights %>% 

 filter(!is.na(dep_delay))

not_cancelled %>% 

 group_by(year, month, day) %>% 

 summarise(mean = mean(dep_delay))

not_cancelled <- flights %>% 

 filter(!is.na(dep_delay), !is.na(arr_delay))

delays <- not_cancelled %>% 

 group_by(tailnum) %>% 

 summarise(

  delay = mean(arr_delay)

 )

flights %>% 

 group_by(year, month, day) %>% 

 summarise(mean = mean(dep_delay, na.rm = TRUE))
`summarise()` has grouped output by 'year', 'month'. You can override using the `.groups` argument.
rename(flights, tail_num = tailnum)
NA
avg_arr_delay <- flights %>%
  group_by(tailnum) %>%
  summarise(avg_arr_delay = mean(arr_delay, na.rm = TRUE)) 
min_delay_tailnum <- avg_arr_delay %>%
  filter(avg_arr_delay == min(avg_arr_delay, na.rm = TRUE))

Question 4

print(min_delay_tailnum)

Question 5

not_cancelled %>% 

 group_by(year, month, day) %>% 

 summarise(

  first = min(dep_time),

  last = max(dep_time)

 )
`summarise()` has grouped output by 'year', 'month'. You can override using the `.groups` argument.

Question 6

monthly_delay_proportion <- flights %>%
  group_by(month) %>%
  summarise(
    total_flights = n(),
    delayed_flights = sum(dep_delay > 60, na.rm = TRUE),
    proportion_delayed = delayed_flights / total_flights
  ) %>%
  arrange(desc(proportion_delayed))
print(monthly_delay_proportion)

Question 7

dest_carrier_counts <- flights %>%
  group_by(dest) %>%
  summarise(num_carriers = n_distinct(carrier)) %>%
  arrange(desc(num_carriers))
print(dest_carrier_counts)
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQpRdWVzdGlvbiAxCmBgYHtyfQpsaWJyYXJ5KG55Y2ZsaWdodHMxMykKbGlicmFyeSh0aWR5dmVyc2UpCmBgYAoKYGBge3J9CnBhY21hbjo6cF9sb2FkKG55Y2ZsaWdodHMxMykKYGBgCgpgYGB7cn0Kc3VtbWFyeShmbGlnaHRzKQpgYGAKCmBgYHtyfQpWaWV3KGZsaWdodHMpCmBgYAoKClF1ZXN0aW9uIDIKYGBge3J9CgoKbWF4ZGVwIDwtIG1heChmbGlnaHRzJGRlcF9kZWxheSwgbmEucm09VFJVRSkKCm1heGRlcF9pZCA8LSB3aGljaChmbGlnaHRzJGRlcF9kZWxheT09bWF4ZGVwKQoKZmxpZ2h0c1ttYXhkZXBfaWQsIDEwOjEyXQpgYGAKYGBge3J9Cgpub3RfY2FuY2VsbGVkIDwtIGZsaWdodHMgJT4lIAogIGZpbHRlcighaXMubmEoZGVwX2RlbGF5KSwgIWlzLm5hKGFycl9kZWxheSkpCmBgYAoKYGBge3J9CgoKc2VsZWN0KGZsaWdodHMsIHN0YXJ0c193aXRoKCJkZXAiKSkKYGBgCmBgYHtyfQoKc29ydGYgPC0gYXJyYW5nZShmbGlnaHRzLGRlc2MoZGVwX2RlbGF5KSkgCgpzZWxlY3Qoc29ydGYsIGNhcnJpZXIsIGZsaWdodCwgdGFpbG51bSwgZXZlcnl0aGluZygpKQpgYGAKCgpgYGB7cn0Kbm90X2NhbmNlbGxlZCAlPiUgZ3JvdXBfYnkoeWVhciwgbW9udGgsIGRheSkgJT4lIAogIHN1bW1hcmlzZSgKICAgIGZpcnN0ID0gbWluKGRlcF90aW1lKSwKICAgIGxhc3QgPSBtYXgoZGVwX3RpbWUpCiAgKQpgYGAKUXVlc3Rpb24gMwpgYGB7cn0Kbm90X2NhbmNlbGxlZCA8LSBmbGlnaHRzICU+JSAKCiBmaWx0ZXIoIWlzLm5hKGRlcF9kZWxheSkpCgpub3RfY2FuY2VsbGVkICU+JSAKCiBncm91cF9ieSh5ZWFyLCBtb250aCwgZGF5KSAlPiUgCgogc3VtbWFyaXNlKG1lYW4gPSBtZWFuKGRlcF9kZWxheSkpCmBgYAoKYGBge3J9Cgpub3RfY2FuY2VsbGVkIDwtIGZsaWdodHMgJT4lIAoKIGZpbHRlcighaXMubmEoZGVwX2RlbGF5KSwgIWlzLm5hKGFycl9kZWxheSkpCmBgYAoKYGBge3J9CgpkZWxheXMgPC0gbm90X2NhbmNlbGxlZCAlPiUgCgogZ3JvdXBfYnkodGFpbG51bSkgJT4lIAoKIHN1bW1hcmlzZSgKCiAgZGVsYXkgPSBtZWFuKGFycl9kZWxheSkKCiApCmBgYAoKYGBge3J9CgpmbGlnaHRzICU+JSAKCiBncm91cF9ieSh5ZWFyLCBtb250aCwgZGF5KSAlPiUgCgogc3VtbWFyaXNlKG1lYW4gPSBtZWFuKGRlcF9kZWxheSwgbmEucm0gPSBUUlVFKSkKYGBgCgpgYGB7cn0KcmVuYW1lKGZsaWdodHMsIHRhaWxfbnVtID0gdGFpbG51bSkKYGBgCmBgYHtyfQphdmdfYXJyX2RlbGF5IDwtIGZsaWdodHMgJT4lCiAgZ3JvdXBfYnkodGFpbG51bSkgJT4lCiAgc3VtbWFyaXNlKGF2Z19hcnJfZGVsYXkgPSBtZWFuKGFycl9kZWxheSwgbmEucm0gPSBUUlVFKSkgCmBgYAoKYGBge3J9Cm1pbl9kZWxheV90YWlsbnVtIDwtIGF2Z19hcnJfZGVsYXkgJT4lCiAgZmlsdGVyKGF2Z19hcnJfZGVsYXkgPT0gbWluKGF2Z19hcnJfZGVsYXksIG5hLnJtID0gVFJVRSkpCmBgYApRdWVzdGlvbiA0CmBgYHtyfQpwcmludChtaW5fZGVsYXlfdGFpbG51bSkKYGBgClF1ZXN0aW9uIDUKYGBge3J9Cm5vdF9jYW5jZWxsZWQgJT4lIAoKIGdyb3VwX2J5KHllYXIsIG1vbnRoLCBkYXkpICU+JSAKCiBzdW1tYXJpc2UoCgogIGZpcnN0ID0gbWluKGRlcF90aW1lKSwKCiAgbGFzdCA9IG1heChkZXBfdGltZSkKCiApCmBgYAoKClF1ZXN0aW9uIDYKYGBge3J9Cm1vbnRobHlfZGVsYXlfcHJvcG9ydGlvbiA8LSBmbGlnaHRzICU+JQogIGdyb3VwX2J5KG1vbnRoKSAlPiUKICBzdW1tYXJpc2UoCiAgICB0b3RhbF9mbGlnaHRzID0gbigpLAogICAgZGVsYXllZF9mbGlnaHRzID0gc3VtKGRlcF9kZWxheSA+IDYwLCBuYS5ybSA9IFRSVUUpLAogICAgcHJvcG9ydGlvbl9kZWxheWVkID0gZGVsYXllZF9mbGlnaHRzIC8gdG90YWxfZmxpZ2h0cwogICkgJT4lCiAgYXJyYW5nZShkZXNjKHByb3BvcnRpb25fZGVsYXllZCkpCnByaW50KG1vbnRobHlfZGVsYXlfcHJvcG9ydGlvbikKYGBgClF1ZXN0aW9uIDcKYGBge3J9CmRlc3RfY2Fycmllcl9jb3VudHMgPC0gZmxpZ2h0cyAlPiUKICBncm91cF9ieShkZXN0KSAlPiUKICBzdW1tYXJpc2UobnVtX2NhcnJpZXJzID0gbl9kaXN0aW5jdChjYXJyaWVyKSkgJT4lCiAgYXJyYW5nZShkZXNjKG51bV9jYXJyaWVycykpCnByaW50KGRlc3RfY2Fycmllcl9jb3VudHMpCmBgYAoK