Question 1

library(dplyr)
pacman::p_load(nycflights13)

View(flights) # this View(flights) function opens lets you directly view the whole dataset
glimpse(flights) 
Rows: 336,776
Columns: 19
$ year           <int> 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013…
$ month          <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ day            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ dep_time       <int> 517, 533, 542, 544, 554, 554, 555, 557, 557, 558, 558, 558, 558, 558, …
$ sched_dep_time <int> 515, 529, 540, 545, 600, 558, 600, 600, 600, 600, 600, 600, 600, 600, …
$ dep_delay      <dbl> 2, 4, 2, -1, -6, -4, -5, -3, -3, -2, -2, -2, -2, -2, -1, 0, -1, 0, 0, …
$ arr_time       <int> 830, 850, 923, 1004, 812, 740, 913, 709, 838, 753, 849, 853, 924, 923,…
$ sched_arr_time <int> 819, 830, 850, 1022, 837, 728, 854, 723, 846, 745, 851, 856, 917, 937,…
$ arr_delay      <dbl> 11, 20, 33, -18, -25, 12, 19, -14, -8, 8, -2, -3, 7, -14, 31, -4, -8, …
$ carrier        <chr> "UA", "UA", "AA", "B6", "DL", "UA", "B6", "EV", "B6", "AA", "B6", "B6"…
$ flight         <int> 1545, 1714, 1141, 725, 461, 1696, 507, 5708, 79, 301, 49, 71, 194, 112…
$ tailnum        <chr> "N14228", "N24211", "N619AA", "N804JB", "N668DN", "N39463", "N516JB", …
$ origin         <chr> "EWR", "LGA", "JFK", "JFK", "LGA", "EWR", "EWR", "LGA", "JFK", "LGA", …
$ dest           <chr> "IAH", "IAH", "MIA", "BQN", "ATL", "ORD", "FLL", "IAD", "MCO", "ORD", …
$ air_time       <dbl> 227, 227, 160, 183, 116, 150, 158, 53, 140, 138, 149, 158, 345, 361, 2…
$ distance       <dbl> 1400, 1416, 1089, 1576, 762, 719, 1065, 229, 944, 733, 1028, 1005, 247…
$ hour           <dbl> 5, 5, 5, 5, 6, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 6, 6, 6, 6, 6, 6, 6…
$ minute         <dbl> 15, 29, 40, 45, 0, 58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0, 0, 0, 0, 10, …
$ time_hour      <dttm> 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-0…

Question 2


sortf <- arrange(flights,desc(dep_delay)) 

select(sortf, carrier, flight, tailnum, everything())

Question 3

not_cancelled <- flights %>% 
  filter(!is.na(dep_delay))

not_cancelled %>% 
  group_by(year, month, day) %>% 
  summarise(mean = mean(dep_delay))
`summarise()` has grouped output by 'year', 'month'. You can override using the `.groups`
argument.

flights %>% 

 group_by(year, month, day) %>% 

 summarise(mean = mean(dep_delay, na.rm = TRUE))
`summarise()` has grouped output by 'year', 'month'. You can override using the `.groups`
argument.

Question 4

library(dplyr)
avg_arr_delay <- flights %>%
  filter(!is.na(arr_delay)) %>% 
  group_by(tailnum) %>% 
  summarise(avg_delay = mean(arr_delay, na.rm = TRUE)) 
lowest_avg_tailnum <- avg_arr_delay %>%
  filter(avg_delay == min(avg_delay))
lowest_avg_tailnum

Question 5

not_cancelled %>% 

 group_by(year, month, day) %>% 

 summarise(

  first = min(dep_time),

  last = max(dep_time)

 )
`summarise()` has grouped output by 'year', 'month'. You can override using the `.groups`
argument.

Question 6

monthly_delay_proportion <- flights %>%
  group_by(month) %>%
  summarise(
    total_flights = n(),
    delayed_flights = sum(dep_delay > 60, na.rm = TRUE),
    proportion_delayed = delayed_flights / total_flights
  ) %>%
  arrange(desc(proportion_delayed))
print(monthly_delay_proportion)

Question 7

dest_carrier_counts <- flights %>%
  group_by(dest) %>%
  summarise(num_carriers = n_distinct(carrier)) %>%
  arrange(desc(num_carriers))
print(dest_carrier_counts)
LS0tCnRpdGxlOiAiQXNzaWdubWVudCA2IgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tClF1ZXN0aW9uIDEKYGBge3J9CmxpYnJhcnkoZHBseXIpCnBhY21hbjo6cF9sb2FkKG55Y2ZsaWdodHMxMykKClZpZXcoZmxpZ2h0cykgCmdsaW1wc2UoZmxpZ2h0cykgCgpgYGAKCgpRdWVzdGlvbiAyIApgYGB7cn0KbGlicmFyeShkcGx5cikKCm1heGRlcCA8LSBtYXgoZmxpZ2h0cyRkZXBfZGVsYXksIG5hLnJtPVRSVUUpCmZpbHRlcihmbGlnaHRzLCBkZXBfZGVsYXkgPT0gbWF4ZGVwKSAlPiUgCiAgc2VsZWN0KGNhcnJpZXIsIGZsaWdodCwgdGFpbG51bSwgZXZlcnl0aGluZygpKQpgYGAKCmBgYHtyfQoKc29ydGYgPC0gYXJyYW5nZShmbGlnaHRzLGRlc2MoZGVwX2RlbGF5KSkgCgpzZWxlY3Qoc29ydGYsIGNhcnJpZXIsIGZsaWdodCwgdGFpbG51bSwgZXZlcnl0aGluZygpKQpgYGAKClF1ZXN0aW9uIDMKYGBge3J9Cm5vdF9jYW5jZWxsZWQgPC0gZmxpZ2h0cyAlPiUgCiAgZmlsdGVyKCFpcy5uYShkZXBfZGVsYXkpKQoKbm90X2NhbmNlbGxlZCAlPiUgCiAgZ3JvdXBfYnkoeWVhciwgbW9udGgsIGRheSkgJT4lIAogIHN1bW1hcmlzZShtZWFuID0gbWVhbihkZXBfZGVsYXkpKQpgYGAKYGBge3J9CgpmbGlnaHRzICU+JSAKCiBncm91cF9ieSh5ZWFyLCBtb250aCwgZGF5KSAlPiUgCgogc3VtbWFyaXNlKG1lYW4gPSBtZWFuKGRlcF9kZWxheSwgbmEucm0gPSBUUlVFKSkKYGBgClF1ZXN0aW9uIDQKYGBge3J9CmxpYnJhcnkoZHBseXIpCmF2Z19hcnJfZGVsYXkgPC0gZmxpZ2h0cyAlPiUKICBmaWx0ZXIoIWlzLm5hKGFycl9kZWxheSkpICU+JSAKICBncm91cF9ieSh0YWlsbnVtKSAlPiUgCiAgc3VtbWFyaXNlKGF2Z19kZWxheSA9IG1lYW4oYXJyX2RlbGF5LCBuYS5ybSA9IFRSVUUpKSAKYGBgCgpgYGB7cn0KbG93ZXN0X2F2Z190YWlsbnVtIDwtIGF2Z19hcnJfZGVsYXkgJT4lCiAgZmlsdGVyKGF2Z19kZWxheSA9PSBtaW4oYXZnX2RlbGF5KSkKYGBgCgpgYGB7cn0KbG93ZXN0X2F2Z190YWlsbnVtCmBgYApRdWVzdGlvbiA1CmBgYHtyfQpub3RfY2FuY2VsbGVkICU+JSAKCiBncm91cF9ieSh5ZWFyLCBtb250aCwgZGF5KSAlPiUgCgogc3VtbWFyaXNlKAoKICBmaXJzdCA9IG1pbihkZXBfdGltZSksCgogIGxhc3QgPSBtYXgoZGVwX3RpbWUpCgogKQpgYGAKUXVlc3Rpb24gNgpgYGB7cn0KbW9udGhseV9kZWxheV9wcm9wb3J0aW9uIDwtIGZsaWdodHMgJT4lCiAgZ3JvdXBfYnkobW9udGgpICU+JQogIHN1bW1hcmlzZSgKICAgIHRvdGFsX2ZsaWdodHMgPSBuKCksCiAgICBkZWxheWVkX2ZsaWdodHMgPSBzdW0oZGVwX2RlbGF5ID4gNjAsIG5hLnJtID0gVFJVRSksCiAgICBwcm9wb3J0aW9uX2RlbGF5ZWQgPSBkZWxheWVkX2ZsaWdodHMgLyB0b3RhbF9mbGlnaHRzCiAgKSAlPiUKICBhcnJhbmdlKGRlc2MocHJvcG9ydGlvbl9kZWxheWVkKSkKcHJpbnQobW9udGhseV9kZWxheV9wcm9wb3J0aW9uKQpgYGAKUXVlc3Rpb24gNwpgYGB7cn0KZGVzdF9jYXJyaWVyX2NvdW50cyA8LSBmbGlnaHRzICU+JQogIGdyb3VwX2J5KGRlc3QpICU+JQogIHN1bW1hcmlzZShudW1fY2FycmllcnMgPSBuX2Rpc3RpbmN0KGNhcnJpZXIpKSAlPiUKICBhcnJhbmdlKGRlc2MobnVtX2NhcnJpZXJzKSkKcHJpbnQoZGVzdF9jYXJyaWVyX2NvdW50cykKYGBgCgoKCgo=