sortf <- arrange(flights, desc(dep_delay))

not_cancelled <- flights %>%
  filter(!is.na(dep_delay), !is.na(arr_delay))

avg_arr_delay <- not_cancelled %>%
  group_by(tailnum) %>%
  summarise(
    count = n(),
    dist = mean(distance, na.rm = TRUE),
    delay = mean(arr_delay, na.rm = TRUE)
  ) %>%
  filter(count > 20)

ggplot(avg_arr_delay, aes(x = dist, y = delay)) +
  geom_point(alpha = 0.5) +
  geom_smooth(se = FALSE) +
  labs(
    title = "Average Arrival Delay vs Distance",
    x = "Average Distance (miles)",
    y = "Average Arrival Delay (minutes)"
  )
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

write_xlsx(avg_arr_delay, "delay_by_dest.xlsx")

sortf <- arrange(flights, desc(dep_delay)) select(sortf, carrier, flight, tailnum, everything())

not_cancelled <- flights %>% filter(!is.na(dep_delay), !is.na(arr_delay))

avg_arr_delay <- not_cancelled %>% group_by(tailnum) %>% summarise(mean_arr_delay = mean(arr_delay, na.rm = TRUE)) %>% arrange(mean_arr_delay)

head(avg_arr_delay, 1)

not_cancelled %>% group_by(year, month, day) %>% summarise(first = min(dep_time), last = max(dep_time)) %>% arrange(desc(last)) prop_by_month <- flights %>% group_by(month) %>% summarise(prop_over_hour = mean(dep_delay > 60, na.rm = TRUE))

prop_by_month dest_carriers <- flights %>% group_by(dest) %>% summarise(num_carriers = n_distinct(carrier)) %>% arrange(desc(num_carriers))

dest_carriers by_dest <- group_by(flights, dest) delay_by_dest <- summarise(by_dest, count = n(), dist = mean(distance, na.rm = TRUE), delay = mean(arr_delay, na.rm = TRUE)) %>% filter(count > 20, dest != “HNL”)

ggplot(delay_by_dest, aes(x = dist, y = delay)) + geom_point(aes(size = count), alpha = 1/3) + geom_smooth(se = FALSE) + labs(title = “Average Arrival Delay vs Distance”, x = “Average Distance (miles)”, y = “Average Arrival Delay (minutes)”) write_xlsx(delay_by_dest, “delay_by_dest.xlsx”) ```