## Load data
#install.packages("nycflights23")
library (tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.2 ✔ tibble 3.2.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.0.4
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (nycflights23)
data ("flights" )
data ('airlines' )
flights_nona <- flights |>
filter (! is.na (distance) & ! is.na (arr_delay) & ! is.na (dep_delay))
# remove na's for distance, arr_delay, departure delay
by_dest <- flights_nona |>
group_by (dest) |> # group all destinations
summarise (count = n (), # counts totals for each destination
avg_dist = mean (distance), # calculates the mean distance traveled
avg_arr_delay = mean (arr_delay), # calculates the mean arrival delay
avg_dep_delay = mean (dep_delay), # calculates the mean dep delay
.groups = "drop" ) |> # remove the grouping structure after summarizing
arrange (avg_arr_delay) |>
filter (avg_dist < 3000 )
head (by_dest)
# A tibble: 6 × 5
dest count avg_dist avg_arr_delay avg_dep_delay
<chr> <int> <dbl> <dbl> <dbl>
1 PNS 71 1030 -10.6 -1.24
2 HHH 461 695. -9.95 1.38
3 HDN 27 1728 -9.93 8.78
4 VPS 107 988 -9.41 2.62
5 AVP 140 93 -8.53 -0.957
6 GSO 2857 456. -7.77 3.81
## code from AI to find the top 10
top10 <- by_dest |>
arrange (desc (avg_arr_delay)) |>
slice_head (n = 10 )
head (top10)
# A tibble: 6 × 5
dest count avg_dist avg_arr_delay avg_dep_delay
<chr> <int> <dbl> <dbl> <dbl>
1 PSE 319 1617 37.6 44.2
2 RNO 129 2410. 34.4 47.0
3 ABQ 218 1825. 26.7 41.2
4 ONT 353 2429 26.1 37.3
5 BQN 957 1579. 25.6 32.2
6 SJU 5312 1602. 21.0 28.9
ggplot (top10, aes (x = avg_arr_delay, y = dest, fill = dest)) +
geom_bar (stat = "identity" ) +
scale_fill_discrete (name = "Destinations" , labels = top10$ dest) +
labs (
title = 'Top 10 Most Delayed Destinations from NYC' ,
x = 'Average Arrival Delay' ,
y = 'destination' ,
caption = 'FAA Aircraft registery' ,
)