Setup

library(tidyverse)
library(lubridate)
library(knitr)
library(skimr)
library(scales)

Importing the data set

library(readr)
miFlights <- read_csv("miFlights2019-2021.csv")
skim(miFlights)
Data summary
Name miFlights
Number of rows 463818
Number of columns 37
_______________________
Column type frequency:
character 9
numeric 27
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
carrier 0 1.00 2 2 0 16 0
tailnum 2189 1.00 3 6 0 5250 0
origin 0 1.00 3 3 0 4 0
dest 0 1.00 3 3 0 130 0
carrier_name 0 1.00 9 34 0 16 0
plane_type 11140 0.98 23 23 0 1 0
plane_manufacturer 11140 0.98 6 29 0 16 0
plane_model 11140 0.98 5 15 0 93 0
plane_engine 11140 0.98 9 9 0 2 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1.00 2019.92 0.85 2019.00 2019.00 2020.00 2021.00 2021.00 ▇▁▆▁▆
month 0 1.00 6.53 3.48 1.00 3.00 7.00 10.00 12.00 ▇▅▅▅▇
day 0 1.00 15.74 8.76 1.00 8.00 16.00 23.00 31.00 ▇▇▇▇▆
dep_time 9060 0.98 1372.26 490.52 1.00 950.00 1355.00 1754.00 2400.00 ▁▇▇▇▆
sched_dep_time 0 1.00 1368.40 481.75 49.00 948.00 1355.00 1750.00 2336.00 ▁▇▇▇▆
dep_delay 9063 0.98 7.11 44.97 -54.00 -5.00 -3.00 0.00 2672.00 ▇▁▁▁▁
arr_time 9324 0.98 1481.01 506.74 1.00 1053.00 1502.00 1905.00 2400.00 ▁▅▇▇▆
sched_arr_time 0 1.00 1496.79 495.16 1.00 1103.00 1510.00 1910.00 2359.00 ▁▃▇▇▇
arr_delay 10239 0.98 0.16 47.21 -85.00 -17.00 -9.00 2.00 2649.00 ▇▁▁▁▁
flight 0 1.00 413.37 269.57 1.00 189.00 387.00 600.00 1322.00 ▇▇▆▂▁
air_time 10239 0.98 94.59 63.12 15.00 50.00 74.00 130.00 581.00 ▇▂▁▁▁
distance 0 1.00 641.00 488.23 74.00 296.00 500.00 957.00 4475.00 ▇▂▁▁▁
hour 0 1.00 13.41 4.79 0.00 9.00 13.00 17.00 23.00 ▁▇▇▇▆
minute 0 1.00 27.49 17.94 0.00 11.00 27.00 44.00 59.00 ▇▆▇▆▆
temp 441760 0.05 42.16 15.40 -4.00 32.00 37.90 48.90 90.00 ▁▆▇▂▁
dewp 441762 0.05 31.91 13.42 -9.00 23.00 28.90 39.90 75.90 ▁▆▇▃▁
humid 441773 0.05 68.87 15.10 25.87 57.93 71.82 80.66 100.00 ▁▃▅▇▃
wind_dir 9205 0.98 181.02 109.46 0.00 80.00 200.00 270.00 360.00 ▇▃▆▇▆
wind_speed 4367 0.99 8.59 5.64 0.00 4.60 8.06 11.51 42.58 ▇▆▁▁▁
wind_gust 4367 0.99 9.88 6.50 0.00 5.30 9.27 13.24 49.00 ▇▆▁▁▁
precip 430846 0.07 0.01 0.02 0.00 0.00 0.00 0.01 0.44 ▇▁▁▁▁
pressure 447131 0.04 1018.83 7.60 990.40 1014.10 1019.00 1023.40 1038.50 ▁▂▇▇▂
visib 1934 1.00 8.18 2.56 0.06 7.00 10.00 10.00 10.00 ▁▁▁▂▇
plane_year 21647 0.95 2008.12 7.15 1987.00 2003.00 2007.00 2015.00 2021.00 ▁▂▇▃▅
plane_engines 11140 0.98 2.00 0.02 2.00 2.00 2.00 2.00 3.00 ▇▁▁▁▁
plane_seats 11140 0.98 127.86 66.68 20.00 80.00 95.00 182.00 451.00 ▇▃▂▁▁
plane_speed 11140 0.98 0.01 1.72 0.00 0.00 0.00 0.00 438.00 ▇▁▁▁▁

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
time_hour 0 1 2019-01-01 05:00:00 2021-12-31 22:00:00 2020-03-26 06:00:00 19059
glimpse(miFlights)
## Rows: 463,818
## Columns: 37
## $ year               <dbl> 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 201…
## $ month              <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ day                <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ dep_time           <dbl> 55, 455, 506, 531, 534, 550, 555, 555, 555, 600, 60…
## $ sched_dep_time     <dbl> 2115, 500, 511, 535, 545, 600, 600, 555, 600, 600, …
## $ dep_delay          <dbl> 220, -5, -5, -4, -11, -10, -5, 0, -5, 0, -2, 3, 4, …
## $ arr_time           <dbl> 426, 830, 710, 647, 750, 712, 822, 709, 755, 559, 9…
## $ sched_arr_time     <dbl> 2323, 834, 730, 710, 742, 748, 834, 715, 817, 615, …
## $ arr_delay          <dbl> 303, -4, -20, -23, 8, -36, -12, -6, -22, -16, 3, -7…
## $ carrier            <chr> "OH", "YX", "AA", "WN", "B6", "YX", "OO", "WN", "DL…
## $ flight             <dbl> 1019, 954, 185, 203, 310, 790, 803, 295, 348, 218, …
## $ tailnum            <chr> "N567NN", "N433YX", "N853NN", "N227WN", "N203JB", "…
## $ origin             <chr> "DTW", "GRR", "DTW", "DTW", "DTW", "DTW", "FNT", "D…
## $ dest               <chr> "CLT", "MIA", "DFW", "BWI", "BOS", "EWR", "ATL", "D…
## $ air_time           <dbl> 88, 187, 162, 64, 71, 61, 125, 174, 105, 45, 179, 1…
## $ distance           <dbl> 500, 1214, 986, 409, 632, 488, 645, 1123, 640, 228,…
## $ hour               <dbl> 21, 5, 5, 5, 5, 6, 6, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
## $ minute             <dbl> 15, 0, 11, 35, 45, 0, 0, 55, 0, 0, 3, 0, 0, 5, 0, 1…
## $ time_hour          <dttm> 2019-01-01 21:00:00, 2019-01-01 05:00:00, 2019-01-…
## $ temp               <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ dewp               <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ humid              <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ wind_dir           <dbl> 10, 320, 290, 290, 290, 290, 330, 290, 290, 290, 29…
## $ wind_speed         <dbl> 8.05546, 9.20624, 19.56326, 19.56326, 19.56326, 13.…
## $ wind_gust          <dbl> 9.270062, 10.594357, 22.513008, 22.513008, 22.51300…
## $ precip             <dbl> NA, 1e-04, NA, NA, NA, NA, 1e-04, NA, NA, NA, NA, N…
## $ pressure           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ visib              <dbl> 1.25, 9.00, 7.00, 7.00, 7.00, 4.00, 9.00, 7.00, 10.…
## $ carrier_name       <chr> "PSA Airlines Inc.", "Republic Airline", "American …
## $ plane_year         <dbl> 2015, 2014, 2010, 2005, 2006, 2016, 2006, 1999, 200…
## $ plane_type         <chr> "Fixed wing multi engine", "Fixed wing multi engine…
## $ plane_manufacturer <chr> "BOMBARDIER INC", "EMBRAER S A", "BOEING", "BOEING"…
## $ plane_model        <chr> "CL-600-2D24", "ERJ 170-200 LR", "737-823", "737-7H…
## $ plane_engines      <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ plane_seats        <dbl> 95, 88, 162, 140, 20, 88, 95, 149, 100, 140, 162, 1…
## $ plane_speed        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ plane_engine       <chr> "Turbo-fan", "Turbo-fan", "Turbo-fan", "Turbo-fan",…

Bar chart

# Making a Bar chart with geom_bar()
miFlights %>% ggplot(aes(x = origin)) +
  geom_bar() +
  labs(title = "Michigan flights, 2019-2021",
       x = "Origin",
       y = "Number of flights")

# Making a Bar chart with geom_col()
miFlights %>% count(origin) %>% 
  ggplot(aes(x = fct_reorder(origin, -n),
             y = n)) +
  geom_col() +
  labs(title = "Michigan flights, 2019-2021",
       x = "Origin",
       y = "Number of flights")

Using the filter() function

janFlights <- miFlights %>% filter(month == 1, day == 1)
janFlights %>% slice_head(n = 5)
## # A tibble: 5 × 37
##    year month   day dep_time sched_dep…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##   <dbl> <dbl> <dbl>    <dbl>       <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <chr>  
## 1  2019     1     1       55        2115     220     426    2323     303 OH     
## 2  2019     1     1      455         500      -5     830     834      -4 YX     
## 3  2019     1     1      506         511      -5     710     730     -20 AA     
## 4  2019     1     1      531         535      -4     647     710     -23 WN     
## 5  2019     1     1      534         545     -11     750     742       8 B6     
## # … with 27 more variables: flight <dbl>, tailnum <chr>, origin <chr>,
## #   dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## #   time_hour <dttm>, temp <dbl>, dewp <dbl>, humid <dbl>, wind_dir <dbl>,
## #   wind_speed <dbl>, wind_gust <dbl>, precip <dbl>, pressure <dbl>,
## #   visib <dbl>, carrier_name <chr>, plane_year <dbl>, plane_type <chr>,
## #   plane_manufacturer <chr>, plane_model <chr>, plane_engines <dbl>,
## #   plane_seats <dbl>, plane_speed <dbl>, plane_engine <chr>, and abbreviated …
dec25 <- miFlights %>% filter(month == 12, day == 25)
dec25 %>% slice_head(n = 5)
## # A tibble: 5 × 37
##    year month   day dep_time sched_dep…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##   <dbl> <dbl> <dbl>    <dbl>       <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <chr>  
## 1  2019    12    25      522         535     -13     709     732     -23 B6     
## 2  2019    12    25      540         545      -5     736     803     -27 DL     
## 3  2019    12    25      552         557      -5     827     812      15 OH     
## 4  2019    12    25      557         600      -3     904     922     -18 F9     
## 5  2019    12    25      557         600      -3     723     734     -11 NK     
## # … with 27 more variables: flight <dbl>, tailnum <chr>, origin <chr>,
## #   dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## #   time_hour <dttm>, temp <dbl>, dewp <dbl>, humid <dbl>, wind_dir <dbl>,
## #   wind_speed <dbl>, wind_gust <dbl>, precip <dbl>, pressure <dbl>,
## #   visib <dbl>, carrier_name <chr>, plane_year <dbl>, plane_type <chr>,
## #   plane_manufacturer <chr>, plane_model <chr>, plane_engines <dbl>,
## #   plane_seats <dbl>, plane_speed <dbl>, plane_engine <chr>, and abbreviated …
# All Flights November or December
novDec <- miFlights %>% filter(month == 11 | month == 12)
novDec <- miFlights %>% filter(month %in% c(11, 12))

# All flights not in November or December
novDec <- miFlights %>% filter(month != 11 & month != 12)
novDec <- miFlights %>% filter(!(month %in% c(11, 12)))

Bar Chart

airports <- c("Detroit Metro Airport" = "DTW",
              "Gerald R.Ford International Airport" = "GRR",
              "Flint Bishp International Airport" = "FNT",
              "Capital Region International Airport" = "LAN")

# bar chart using geom_col()
miFlights %>% count(origin) %>% 
  mutate(origin = fct_recode(origin, !!!airports)) %>% 
  ggplot(aes(x = fct_reorder(origin, n),
             y = n)) +
  geom_col() +
  coord_flip() +
  labs(title = "Michigan flights, 2019-2021",
       x = "Origin",
       y = "Number of flights",
       caption = "Data Source:anyFlights R package") +
  scale_y_continuous(labels = comma) +
  theme_bw()

Missing Values

# All missing departure time flights
miFlightsMiss <- miFlights %>% filter(is.na(dep_time))

# All non-missing departure time flights
miFlightsComplete <- miFlights %>% filter(!(is.na(dep_time)))

Using the arrange() function

# Sort miFlights by the day of the flight (smallest to largest), and print the first 4 columns and 5 rows of the resulting data set using the slice_head() function.

miFlights %>% arrange(day) %>% select(year:dep_time) %>% slice_head(n = 5)
## # A tibble: 5 × 4
##    year month   day dep_time
##   <dbl> <dbl> <dbl>    <dbl>
## 1  2019     1     1       55
## 2  2019     1     1      455
## 3  2019     1     1      506
## 4  2019     1     1      531
## 5  2019     1     1      534
#Sort miFlights by the day of the flight (largest to smallest), and print the first 4 columns and 5 rows of the resulting data set using the slice_head() function.

miFlights %>% arrange(desc(day)) %>% select(year:dep_time) %>% slice_head(n = 5)
## # A tibble: 5 × 4
##    year month   day dep_time
##   <dbl> <dbl> <dbl>    <dbl>
## 1  2019     1    31       59
## 2  2019     1    31      535
## 3  2019     1    31      540
## 4  2019     1    31      548
## 5  2019     1    31      549
# Sort miFlights by the year, month, and day of the flight.
miFlightsSorted <- miFlights %>% arrange(year, month, day)

You try

# Sort miFlights to find the 3 most delayed flights (arr_delay), and the 3 that left the earliest relative to their scheduled departure (dep_delay).

miFlights %>% arrange(desc(arr_delay)) %>% select(year, month, day,  arr_delay) %>% slice_head(n = 3)
## # A tibble: 3 × 4
##    year month   day arr_delay
##   <dbl> <dbl> <dbl>     <dbl>
## 1  2019     2    17      2649
## 2  2021     7    12      1961
## 3  2019    12    19      1792
# Sort miFlights to find the fastest (highest speed) flights. Hint: speed is equal to distance / air_time.
miFlights %>% arrange(desc(distance / air_time)) %>% slice_head(n = 3)
## # A tibble: 3 × 37
##    year month   day dep_time sched_dep…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##   <dbl> <dbl> <dbl>    <dbl>       <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <chr>  
## 1  2019     7    14     1230        1215      15    1339    1326      13 OO     
## 2  2020     8    21     1645        1605      40    1621    1626      -5 OO     
## 3  2021    11    21     1000        1000       0    1119    1132     -13 OO     
## # … with 27 more variables: flight <dbl>, tailnum <chr>, origin <chr>,
## #   dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## #   time_hour <dttm>, temp <dbl>, dewp <dbl>, humid <dbl>, wind_dir <dbl>,
## #   wind_speed <dbl>, wind_gust <dbl>, precip <dbl>, pressure <dbl>,
## #   visib <dbl>, carrier_name <chr>, plane_year <dbl>, plane_type <chr>,
## #   plane_manufacturer <chr>, plane_model <chr>, plane_engines <dbl>,
## #   plane_seats <dbl>, plane_speed <dbl>, plane_engine <chr>, and abbreviated …
# For flights coming out of GRR, find the 3 flights that traveled the farthest (distance) and that arrived the earliest in the morning (arr_time) simultaneously.
miFlights %>% filter(origin == "GRR") %>% arrange(desc(distance), arr_time) %>% slice_head(n = 3)
## # A tibble: 3 × 37
##    year month   day dep_time sched_dep…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##   <dbl> <dbl> <dbl>    <dbl>       <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <chr>  
## 1  2021     3    28      555         600      -5     714     758     -44 G4     
## 2  2021     4    11      552         600      -8     716     740     -24 G4     
## 3  2021     3    18      558         600      -2     720     758     -38 G4     
## # … with 27 more variables: flight <dbl>, tailnum <chr>, origin <chr>,
## #   dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## #   time_hour <dttm>, temp <dbl>, dewp <dbl>, humid <dbl>, wind_dir <dbl>,
## #   wind_speed <dbl>, wind_gust <dbl>, precip <dbl>, pressure <dbl>,
## #   visib <dbl>, carrier_name <chr>, plane_year <dbl>, plane_type <chr>,
## #   plane_manufacturer <chr>, plane_model <chr>, plane_engines <dbl>,
## #   plane_seats <dbl>, plane_speed <dbl>, plane_engine <chr>, and abbreviated …

Using the select() function

#Drop the year and month columns from miFlights creating a new data set called miDropped.
miDropped <- miFlights %>% select(-year, -month)

# Drop all variables between year and day columns (inclusive) from miFlights creating a new data set called miDropped2.
miDropped2 <- miFlights %>% select(-(year : day))
#We can use select() and everything() to rearrange columns and still drop columns too:
rearrangedMIFlights <- miFlights %>% select(time_hour, air_time, everything(), -day)

You try 2

#Create a subset of the miFlights data set called timeFlights that only contains variables that end with the word “time”.
timeFlights <- miFlights %>% select(ends_with("time"))

#Create a new data frame called departureInfo that only has variables that start with “dep”
departureInfo <- miFlights %>% select(starts_with("dep"))

#Create a new data frame called newFlights by rearranging the columns of the full miFlights data set so that flight number (flight), origin (origin), and destination (dest) are provided first, then all other columns except the tail number (tailnum).
newFlights <- miFlights %>% select(flight, origin, dest, everything(), -tailnum)

Using the mutate() function

# creating a new gain variable
flights_sml <- miFlights %>% select(ends_with("delay"), distance, air_time)

flights_sml %>% mutate(gain = dep_delay - arr_delay) %>% 
  slice_head(n = 5)
## # A tibble: 5 × 5
##   dep_delay arr_delay distance air_time  gain
##       <dbl>     <dbl>    <dbl>    <dbl> <dbl>
## 1       220       303      500       88   -83
## 2        -5        -4     1214      187    -1
## 3        -5       -20      986      162    15
## 4        -4       -23      409       64    19
## 5       -11         8      632       71   -19
# Creating a filghtSpeeds data set which has speed as a feature

flightSpeeds <- flights_sml %>% mutate(gain = dep_delay - arr_delay, 
                                       speed = distance/air_time)

Creating a scatter plot

flightSpeeds %>% ggplot(aes(x = speed*60,
                            y = gain,
                            color = distance)) +
  geom_point(alpha = 0.10) +
  labs(title = "Michigan Flights gains by speed, 2019 - 2021",
       x = "Speed (miles per hour)",
       y = "Gain (minutes)",
       caption = "Data Source:anyFlights R package" ) +
  theme_bw() +
  theme(legend.position = "bottom")

Group-wise operations and statistics with group_by() & summarize

miFlights %>% group_by(year, month, day) %>% 
summarize(delay = mean(dep_delay, na.rm = TRUE)) %>% 
  slice_head(n = 5)
## # A tibble: 180 × 4
## # Groups:   year, month [36]
##     year month   day  delay
##    <dbl> <dbl> <dbl>  <dbl>
##  1  2019     1     1  8.58 
##  2  2019     1     2 12.5  
##  3  2019     1     3  0.721
##  4  2019     1     4 -0.477
##  5  2019     1     5  2.17 
##  6  2019     2     1 27.8  
##  7  2019     2     2 13.0  
##  8  2019     2     3  1.49 
##  9  2019     2     4  7.91 
## 10  2019     2     5  1.91 
## # … with 170 more rows
delaySummary <- miFlights %>% group_by(carrier_name) %>% 
  summarize(delay = mean(arr_delay, na.rm = T))

Creating a Bar chart using geom_col()

delaySummary %>% ggplot(aes(x = fct_reorder(carrier_name, delay),
                            y = delay,
                            fill = delay > 0)) +
  geom_col() +
  labs(title = "Average flight delays by carrier ",
       subtitle = "Michigan Flights gains by speed, 2019 - 2021",
       x = "Carrier",
       y = "Average delay (minutes)",
        caption = "Data Source:anyFlights R package") +
  scale_fill_manual(values = c("#D55E00", "#0072B2")) +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(face = "bold"),
        axis.text.x = element_text(angle = 45,
                                   size = 8,
                                   vjust = 0.70))

You try 3

monthlyDelays <- miFlights %>% group_by(month) %>% 
  summarize(delay = median(arr_delay, na.rm = T))

monthlyDelays %>% knitr::kable()
month delay
1 -13
2 -11
3 -12
4 -10
5 -9
6 -7
7 -8
8 -8
9 -10
10 -8
11 -10
12 -10
# creating a line chart
monthlyDelays %>% ggplot(aes(x = month,
                           y = delay)) +
  geom_line(color = "dodgerblue") +
  labs(title = "Monthly arrival delays",
       subtitle = "Michigan flights, 2019 - 2021",
       x = "Month",
       y = "Median delay (minutes)",
       caption = "Data Source:anyFlights R package") +
  scale_x_continuous(breaks = 1 : 12) +
  theme_bw()

monthlyAirportDelays <- miFlights %>% group_by(month, origin) %>% 
  summarize(delay = median(arr_delay, na.rm = T))

monthlyAirportDelays %>% knitr::kable()
month origin delay
1 DTW -13
1 FNT -10
1 GRR -11
1 LAN -12
2 DTW -11
2 FNT -4
2 GRR -6
2 LAN -9
3 DTW -12
3 FNT -11
3 GRR -11
3 LAN -13
4 DTW -10
4 FNT -14
4 GRR -12
4 LAN -12
5 DTW -8
5 FNT -10
5 GRR -10
5 LAN -10
6 DTW -7
6 FNT -7
6 GRR -7
6 LAN -8
7 DTW -8
7 FNT -6
7 GRR -8
7 LAN -8
8 DTW -8
8 FNT -8
8 GRR -8
8 LAN -9
9 DTW -10
9 FNT -10
9 GRR -10
9 LAN -10
10 DTW -8
10 FNT -7
10 GRR -8
10 LAN -9
11 DTW -10
11 FNT -9
11 GRR -9
11 LAN -10
12 DTW -10
12 FNT -7
12 GRR -8
12 LAN -13
# creating a line chart
monthlyAirportDelays %>% ggplot(aes(x = month,
                           y = delay,
                           color = origin)) +
  geom_line() +
  labs(title = "Monthly arrival delays",
       subtitle = "Michigan flights, 2019 - 2021",
       x = "Month",
       y = "Median delay (minutes)",
       caption = "Data Source:anyFlights R package") +
  scale_x_continuous(breaks = 1 : 12) +
  theme_bw()

Bonus (optional): Create a line chart showing the average daily flight delay across time for each of the major airports

monthlyAirportDelays1 <- miFlights %>% group_by(month, origin) %>% 
  summarize(delay = mean(arr_delay, na.rm = T))

monthlyAirportDelays1 %>% knitr::kable()
month origin delay
1 DTW -2.5621779
1 FNT 2.0756534
1 GRR 3.0469067
1 LAN 3.6784566
2 DTW 0.7469195
2 FNT 10.2602378
2 GRR 7.6872737
2 LAN 11.7045075
3 DTW -5.3431079
3 FNT -1.4900662
3 GRR -1.1165397
3 LAN -0.5157233
4 DTW -2.0344987
4 FNT -4.7945792
4 GRR -2.8542573
4 LAN -0.6356073
5 DTW -0.9764161
5 FNT 3.8609078
5 GRR -2.2899173
5 LAN 1.8093588
6 DTW 3.8633216
6 FNT 9.5192308
6 GRR 6.4788484
6 LAN 8.3279857
7 DTW 5.0758884
7 FNT 6.1802326
7 GRR 6.8374690
7 LAN 10.9069374
8 DTW 2.3602065
8 FNT 5.2003130
8 GRR 3.8260135
8 LAN 1.6283892
9 DTW -3.4418731
9 FNT -0.2061538
9 GRR -2.3189819
9 LAN -0.9334416
10 DTW -1.1352272
10 FNT 1.5730337
10 GRR 3.0774160
10 LAN 2.9658385
11 DTW -0.0162294
11 FNT -1.4349650
11 GRR 1.0087202
11 LAN 2.2409867
12 DTW 0.8812990
12 FNT 5.8241206
12 GRR 4.9704201
12 LAN -2.8902027
# creating a line chart
monthlyAirportDelays1 %>% ggplot(aes(x = month,
                           y = delay,
                           color = origin)) +
  geom_line() +
  labs(title = "Average daily flight delays",
       subtitle = "Michigan flights, 2019 - 2021",
       x = "Month",
       y = "Mean delay (minutes)",
       caption = "Data Source:anyFlights R package") +
  scale_x_continuous(breaks = 1 : 12) +
  theme_bw()