Question 1

library(dplyr)

pacman::p_load(nycflights13)

View(flights) # this View() function opens lets you directly view the whole dataset

glimpse(flights) # this glimpse() function provides a quick overview of the dataset
Rows: 336,776
Columns: 19
$ year           <int> 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2…
$ month          <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ day            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ dep_time       <int> 517, 533, 542, 544, 554, 554, 555, 557, 557, 558, 558, 558, 558, 558, 559, 559,…
$ sched_dep_time <int> 515, 529, 540, 545, 600, 558, 600, 600, 600, 600, 600, 600, 600, 600, 600, 559,…
$ dep_delay      <dbl> 2, 4, 2, -1, -6, -4, -5, -3, -3, -2, -2, -2, -2, -2, -1, 0, -1, 0, 0, 1, -8, -3…
$ arr_time       <int> 830, 850, 923, 1004, 812, 740, 913, 709, 838, 753, 849, 853, 924, 923, 941, 702…
$ sched_arr_time <int> 819, 830, 850, 1022, 837, 728, 854, 723, 846, 745, 851, 856, 917, 937, 910, 706…
$ arr_delay      <dbl> 11, 20, 33, -18, -25, 12, 19, -14, -8, 8, -2, -3, 7, -14, 31, -4, -8, -7, 12, -…
$ carrier        <chr> "UA", "UA", "AA", "B6", "DL", "UA", "B6", "EV", "B6", "AA", "B6", "B6", "UA", "…
$ flight         <int> 1545, 1714, 1141, 725, 461, 1696, 507, 5708, 79, 301, 49, 71, 194, 1124, 707, 1…
$ tailnum        <chr> "N14228", "N24211", "N619AA", "N804JB", "N668DN", "N39463", "N516JB", "N829AS",…
$ origin         <chr> "EWR", "LGA", "JFK", "JFK", "LGA", "EWR", "EWR", "LGA", "JFK", "LGA", "JFK", "J…
$ dest           <chr> "IAH", "IAH", "MIA", "BQN", "ATL", "ORD", "FLL", "IAD", "MCO", "ORD", "PBI", "T…
$ air_time       <dbl> 227, 227, 160, 183, 116, 150, 158, 53, 140, 138, 149, 158, 345, 361, 257, 44, 3…
$ distance       <dbl> 1400, 1416, 1089, 1576, 762, 719, 1065, 229, 944, 733, 1028, 1005, 2475, 2565, …
$ hour           <dbl> 5, 5, 5, 5, 6, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6…
$ minute         <dbl> 15, 29, 40, 45, 0, 58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0, 0, 0, 0, 10, 5, 10, 10…
$ time_hour      <dttm> 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 05:0…
summary(flights)
      year          month             day           dep_time    sched_dep_time   dep_delay      
 Min.   :2013   Min.   : 1.000   Min.   : 1.00   Min.   :   1   Min.   : 106   Min.   : -43.00  
 1st Qu.:2013   1st Qu.: 4.000   1st Qu.: 8.00   1st Qu.: 907   1st Qu.: 906   1st Qu.:  -5.00  
 Median :2013   Median : 7.000   Median :16.00   Median :1401   Median :1359   Median :  -2.00  
 Mean   :2013   Mean   : 6.549   Mean   :15.71   Mean   :1349   Mean   :1344   Mean   :  12.64  
 3rd Qu.:2013   3rd Qu.:10.000   3rd Qu.:23.00   3rd Qu.:1744   3rd Qu.:1729   3rd Qu.:  11.00  
 Max.   :2013   Max.   :12.000   Max.   :31.00   Max.   :2400   Max.   :2359   Max.   :1301.00  
                                                 NA's   :8255                  NA's   :8255     
    arr_time    sched_arr_time   arr_delay          carrier              flight       tailnum         
 Min.   :   1   Min.   :   1   Min.   : -86.000   Length:336776      Min.   :   1   Length:336776     
 1st Qu.:1104   1st Qu.:1124   1st Qu.: -17.000   Class :character   1st Qu.: 553   Class :character  
 Median :1535   Median :1556   Median :  -5.000   Mode  :character   Median :1496   Mode  :character  
 Mean   :1502   Mean   :1536   Mean   :   6.895                      Mean   :1972                     
 3rd Qu.:1940   3rd Qu.:1945   3rd Qu.:  14.000                      3rd Qu.:3465                     
 Max.   :2400   Max.   :2359   Max.   :1272.000                      Max.   :8500                     
 NA's   :8713                  NA's   :9430                                                           
    origin              dest              air_time        distance         hour           minute     
 Length:336776      Length:336776      Min.   : 20.0   Min.   :  17   Min.   : 1.00   Min.   : 0.00  
 Class :character   Class :character   1st Qu.: 82.0   1st Qu.: 502   1st Qu.: 9.00   1st Qu.: 8.00  
 Mode  :character   Mode  :character   Median :129.0   Median : 872   Median :13.00   Median :29.00  
                                       Mean   :150.7   Mean   :1040   Mean   :13.18   Mean   :26.23  
                                       3rd Qu.:192.0   3rd Qu.:1389   3rd Qu.:17.00   3rd Qu.:44.00  
                                       Max.   :695.0   Max.   :4983   Max.   :23.00   Max.   :59.00  
                                       NA's   :9430                                                  
   time_hour                     
 Min.   :2013-01-01 05:00:00.00  
 1st Qu.:2013-04-04 13:00:00.00  
 Median :2013-07-03 10:00:00.00  
 Mean   :2013-07-03 05:22:54.64  
 3rd Qu.:2013-10-01 07:00:00.00  
 Max.   :2013-12-31 23:00:00.00  
                                 

Question 2

sortf <- arrange(flights,desc(dep_delay)) 

select(sortf, carrier, flight, tailnum, everything())
maxdep <- max(flights$dep_delay, na.rm=TRUE)

maxdep_id <- which(flights$dep_delay==maxdep)

flights[maxdep_id, 10:12]
select(flights, starts_with("dep"))
summarise(flights, delay=mean(dep_delay,na.rm=TRUE))

Question 3

not_cancelled <- flights %>% 

 filter(!is.na(dep_delay))


not_cancelled %>% 

 group_by(year, month, day) %>% 

 summarise(mean = mean(dep_delay))
`summarise()` has grouped output by 'year', 'month'. You can override using the `.groups` argument.
flights %>% 

 group_by(year, month, day) %>% 

 summarise(mean = mean(dep_delay, na.rm = TRUE))
`summarise()` has grouped output by 'year', 'month'. You can override using the `.groups` argument.
delays <- not_cancelled %>% 

 group_by(tailnum) %>% 

 summarise(

  delay = mean(arr_delay)

 )
not_cancelled <- flights %>% 

 filter(!is.na(dep_delay), !is.na(arr_delay))

Question 4

# Step 1: Compute the average arrival delay per tailnum
avg_arr_delay <- flights %>%
  filter(!is.na(arr_delay)) %>%
  group_by(tailnum) %>%
  summarise(avg_arr_delay = mean(arr_delay, na.rm = TRUE))

# Step 2: Get the tailnum with the lowest average arrival delay
min_tailnum <- avg_arr_delay %>%
  filter(avg_arr_delay == min(avg_arr_delay)) %>%
  select(tailnum)

# Print the result
print(min_tailnum)

Question 5

not_cancelled %>% 

 group_by(year, month, day) %>% 

 summarise(

  first = min(dep_time),

  last = max(dep_time)

 )
`summarise()` has grouped output by 'year', 'month'. You can override using the `.groups` argument.

Question 6

# Compute the proportion of flights delayed by more than an hour for each month
delay_proportion <- flights %>%
  filter(!is.na(dep_delay)) %>%  # Remove NA values (cancelled flights)
  group_by(month) %>%
  summarise(
    total_flights = n(),  
    delayed_flights = sum(dep_delay > 60),  
    proportion = delayed_flights / total_flights  
  )

print(delay_proportion)

Question 7

not_cancelled %>% 
  group_by(dest) %>% 
  summarise(carriers = n_distinct(carrier)) %>% 
  arrange(desc(carriers))

Question 8 not needed in Code

Question 9

delays <- flights %>% 

 group_by(dest) %>% 

 summarise(

  count = n(),

  dist = mean(distance, na.rm = TRUE),

  delay = mean(arr_delay, na.rm = TRUE)

 ) %>% 

 filter(count > 20, dest != "HNL")
LS0tDQp0aXRsZTogIkFzc2lnbm1lbnQgNiINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQpRdWVzdGlvbiAxDQpgYGB7cn0NCmxpYnJhcnkoZHBseXIpICMgbWFrZSBzdXJlIHRvIHJ1biBkcGx5ciBvdGhlcndpc2UgZ2xpbXBzZSgpIGRvZXMgbm90IHdvcmsNCg0KcGFjbWFuOjpwX2xvYWQobnljZmxpZ2h0czEzKQ0KDQpWaWV3KGZsaWdodHMpICMgdGhpcyBWaWV3KCkgZnVuY3Rpb24gb3BlbnMgbGV0cyB5b3UgZGlyZWN0bHkgdmlldyB0aGUgd2hvbGUgZGF0YXNldA0KDQpnbGltcHNlKGZsaWdodHMpICMgdGhpcyBnbGltcHNlKCkgZnVuY3Rpb24gcHJvdmlkZXMgYSBxdWljayBvdmVydmlldyBvZiB0aGUgZGF0YXNldA0KDQpzdW1tYXJ5KGZsaWdodHMpDQpgYGANClF1ZXN0aW9uIDINCmBgYHtyfSANCnNvcnRmIDwtIGFycmFuZ2UoZmxpZ2h0cyxkZXNjKGRlcF9kZWxheSkpIA0KDQpzZWxlY3Qoc29ydGYsIGNhcnJpZXIsIGZsaWdodCwgdGFpbG51bSwgZXZlcnl0aGluZygpKQ0KYGBgDQpgYGB7cn0NCm1heGRlcCA8LSBtYXgoZmxpZ2h0cyRkZXBfZGVsYXksIG5hLnJtPVRSVUUpDQoNCm1heGRlcF9pZCA8LSB3aGljaChmbGlnaHRzJGRlcF9kZWxheT09bWF4ZGVwKQ0KDQpmbGlnaHRzW21heGRlcF9pZCwgMTA6MTJdDQpgYGANCmBgYHtyfQ0Kc2VsZWN0KGZsaWdodHMsIHN0YXJ0c193aXRoKCJkZXAiKSkNCmBgYA0KYGBge3J9DQpzdW1tYXJpc2UoZmxpZ2h0cywgZGVsYXk9bWVhbihkZXBfZGVsYXksbmEucm09VFJVRSkpDQpgYGANClF1ZXN0aW9uIDMNCmBgYHtyfQ0Kbm90X2NhbmNlbGxlZCA8LSBmbGlnaHRzICU+JSANCg0KIGZpbHRlcighaXMubmEoZGVwX2RlbGF5KSkNCg0KDQpub3RfY2FuY2VsbGVkICU+JSANCg0KIGdyb3VwX2J5KHllYXIsIG1vbnRoLCBkYXkpICU+JSANCg0KIHN1bW1hcmlzZShtZWFuID0gbWVhbihkZXBfZGVsYXkpKQ0KYGBgDQpgYGB7cn0NCmZsaWdodHMgJT4lIA0KDQogZ3JvdXBfYnkoeWVhciwgbW9udGgsIGRheSkgJT4lIA0KDQogc3VtbWFyaXNlKG1lYW4gPSBtZWFuKGRlcF9kZWxheSwgbmEucm0gPSBUUlVFKSkNCmBgYA0KYGBge3J9DQpkZWxheXMgPC0gbm90X2NhbmNlbGxlZCAlPiUgDQoNCiBncm91cF9ieSh0YWlsbnVtKSAlPiUgDQoNCiBzdW1tYXJpc2UoDQoNCiAgZGVsYXkgPSBtZWFuKGFycl9kZWxheSkNCg0KICkNCmBgYA0KDQpgYGB7cn0NCm5vdF9jYW5jZWxsZWQgPC0gZmxpZ2h0cyAlPiUgDQoNCiBmaWx0ZXIoIWlzLm5hKGRlcF9kZWxheSksICFpcy5uYShhcnJfZGVsYXkpKQ0KYGBgDQoNClF1ZXN0aW9uIDQNCmBgYHtyfQ0KIyBTdGVwIDE6IENvbXB1dGUgdGhlIGF2ZXJhZ2UgYXJyaXZhbCBkZWxheSBwZXIgdGFpbG51bQ0KYXZnX2Fycl9kZWxheSA8LSBmbGlnaHRzICU+JQ0KICBmaWx0ZXIoIWlzLm5hKGFycl9kZWxheSkpICU+JQ0KICBncm91cF9ieSh0YWlsbnVtKSAlPiUNCiAgc3VtbWFyaXNlKGF2Z19hcnJfZGVsYXkgPSBtZWFuKGFycl9kZWxheSwgbmEucm0gPSBUUlVFKSkNCg0KIyBTdGVwIDI6IEdldCB0aGUgdGFpbG51bSB3aXRoIHRoZSBsb3dlc3QgYXZlcmFnZSBhcnJpdmFsIGRlbGF5DQptaW5fdGFpbG51bSA8LSBhdmdfYXJyX2RlbGF5ICU+JQ0KICBmaWx0ZXIoYXZnX2Fycl9kZWxheSA9PSBtaW4oYXZnX2Fycl9kZWxheSkpICU+JQ0KICBzZWxlY3QodGFpbG51bSkNCg0KIyBQcmludCB0aGUgcmVzdWx0DQpwcmludChtaW5fdGFpbG51bSkNCmBgYA0KUXVlc3Rpb24gNQ0KYGBge3J9DQpub3RfY2FuY2VsbGVkICU+JSANCg0KIGdyb3VwX2J5KHllYXIsIG1vbnRoLCBkYXkpICU+JSANCg0KIHN1bW1hcmlzZSgNCg0KICBmaXJzdCA9IG1pbihkZXBfdGltZSksDQoNCiAgbGFzdCA9IG1heChkZXBfdGltZSkNCg0KICkNCmBgYA0KUXVlc3Rpb24gNg0KYGBge3J9DQojIENvbXB1dGUgdGhlIHByb3BvcnRpb24gb2YgZmxpZ2h0cyBkZWxheWVkIGJ5IG1vcmUgdGhhbiBhbiBob3VyIGZvciBlYWNoIG1vbnRoDQpkZWxheV9wcm9wb3J0aW9uIDwtIGZsaWdodHMgJT4lDQogIGZpbHRlcighaXMubmEoZGVwX2RlbGF5KSkgJT4lICAjIFJlbW92ZSBOQSB2YWx1ZXMgKGNhbmNlbGxlZCBmbGlnaHRzKQ0KICBncm91cF9ieShtb250aCkgJT4lDQogIHN1bW1hcmlzZSgNCiAgICB0b3RhbF9mbGlnaHRzID0gbigpLCAgDQogICAgZGVsYXllZF9mbGlnaHRzID0gc3VtKGRlcF9kZWxheSA+IDYwKSwgIA0KICAgIHByb3BvcnRpb24gPSBkZWxheWVkX2ZsaWdodHMgLyB0b3RhbF9mbGlnaHRzICANCiAgKQ0KDQpwcmludChkZWxheV9wcm9wb3J0aW9uKQ0KYGBgDQpRdWVzdGlvbiA3DQpgYGB7cn0NCm5vdF9jYW5jZWxsZWQgJT4lIA0KICBncm91cF9ieShkZXN0KSAlPiUgDQogIHN1bW1hcmlzZShjYXJyaWVycyA9IG5fZGlzdGluY3QoY2FycmllcikpICU+JSANCiAgYXJyYW5nZShkZXNjKGNhcnJpZXJzKSkNCmBgYA0KUXVlc3Rpb24gOCBub3QgbmVlZGVkIGluIENvZGUNCg0KUXVlc3Rpb24gOQ0KYGBge3J9DQpkZWxheXMgPC0gZmxpZ2h0cyAlPiUgDQoNCiBncm91cF9ieShkZXN0KSAlPiUgDQoNCiBzdW1tYXJpc2UoDQoNCiAgY291bnQgPSBuKCksDQoNCiAgZGlzdCA9IG1lYW4oZGlzdGFuY2UsIG5hLnJtID0gVFJVRSksDQoNCiAgZGVsYXkgPSBtZWFuKGFycl9kZWxheSwgbmEucm0gPSBUUlVFKQ0KDQogKSAlPiUgDQoNCiBmaWx0ZXIoY291bnQgPiAyMCwgZGVzdCAhPSAiSE5MIikNCmBgYA0KDQoNCg==