sortf <- arrange(flights, desc(dep_delay))
not_cancelled <- flights %>%
filter(!is.na(dep_delay), !is.na(arr_delay))
avg_arr_delay <- not_cancelled %>%
group_by(tailnum) %>%
summarise(
count = n(),
dist = mean(distance, na.rm = TRUE),
delay = mean(arr_delay, na.rm = TRUE)
) %>%
filter(count > 20)
ggplot(avg_arr_delay, aes(x = dist, y = delay)) +
geom_point(alpha = 0.5) +
geom_smooth(se = FALSE) +
labs(
title = "Average Arrival Delay vs Distance",
x = "Average Distance (miles)",
y = "Average Arrival Delay (minutes)"
)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
write_xlsx(avg_arr_delay, "delay_by_dest.xlsx")
sortf <- arrange(flights, desc(dep_delay)) select(sortf, carrier, flight, tailnum, everything())
not_cancelled <- flights %>% filter(!is.na(dep_delay), !is.na(arr_delay))
avg_arr_delay <- not_cancelled %>% group_by(tailnum) %>% summarise(mean_arr_delay = mean(arr_delay, na.rm = TRUE)) %>% arrange(mean_arr_delay)
head(avg_arr_delay, 1)
not_cancelled %>% group_by(year, month, day) %>% summarise(first = min(dep_time), last = max(dep_time)) %>% arrange(desc(last)) prop_by_month <- flights %>% group_by(month) %>% summarise(prop_over_hour = mean(dep_delay > 60, na.rm = TRUE))
prop_by_month dest_carriers <- flights %>% group_by(dest) %>% summarise(num_carriers = n_distinct(carrier)) %>% arrange(desc(num_carriers))
dest_carriers by_dest <- group_by(flights, dest) delay_by_dest <- summarise(by_dest, count = n(), dist = mean(distance, na.rm = TRUE), delay = mean(arr_delay, na.rm = TRUE)) %>% filter(count > 20, dest != “HNL”)
ggplot(delay_by_dest, aes(x = dist, y = delay)) + geom_point(aes(size = count), alpha = 1/3) + geom_smooth(se = FALSE) + labs(title = “Average Arrival Delay vs Distance”, x = “Average Distance (miles)”, y = “Average Arrival Delay (minutes)”) write_xlsx(delay_by_dest, “delay_by_dest.xlsx”) ```