library(tidyverse)
library(lubridate)
library(knitr)
library(skimr)
library(scales)library(readr)
miFlights <- read_csv("miFlights2019-2021.csv")
skim(miFlights)| Name | miFlights |
| Number of rows | 463818 |
| Number of columns | 37 |
| _______________________ | |
| Column type frequency: | |
| character | 9 |
| numeric | 27 |
| POSIXct | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| carrier | 0 | 1.00 | 2 | 2 | 0 | 16 | 0 |
| tailnum | 2189 | 1.00 | 3 | 6 | 0 | 5250 | 0 |
| origin | 0 | 1.00 | 3 | 3 | 0 | 4 | 0 |
| dest | 0 | 1.00 | 3 | 3 | 0 | 130 | 0 |
| carrier_name | 0 | 1.00 | 9 | 34 | 0 | 16 | 0 |
| plane_type | 11140 | 0.98 | 23 | 23 | 0 | 1 | 0 |
| plane_manufacturer | 11140 | 0.98 | 6 | 29 | 0 | 16 | 0 |
| plane_model | 11140 | 0.98 | 5 | 15 | 0 | 93 | 0 |
| plane_engine | 11140 | 0.98 | 9 | 9 | 0 | 2 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| year | 0 | 1.00 | 2019.92 | 0.85 | 2019.00 | 2019.00 | 2020.00 | 2021.00 | 2021.00 | ▇▁▆▁▆ |
| month | 0 | 1.00 | 6.53 | 3.48 | 1.00 | 3.00 | 7.00 | 10.00 | 12.00 | ▇▅▅▅▇ |
| day | 0 | 1.00 | 15.74 | 8.76 | 1.00 | 8.00 | 16.00 | 23.00 | 31.00 | ▇▇▇▇▆ |
| dep_time | 9060 | 0.98 | 1372.26 | 490.52 | 1.00 | 950.00 | 1355.00 | 1754.00 | 2400.00 | ▁▇▇▇▆ |
| sched_dep_time | 0 | 1.00 | 1368.40 | 481.75 | 49.00 | 948.00 | 1355.00 | 1750.00 | 2336.00 | ▁▇▇▇▆ |
| dep_delay | 9063 | 0.98 | 7.11 | 44.97 | -54.00 | -5.00 | -3.00 | 0.00 | 2672.00 | ▇▁▁▁▁ |
| arr_time | 9324 | 0.98 | 1481.01 | 506.74 | 1.00 | 1053.00 | 1502.00 | 1905.00 | 2400.00 | ▁▅▇▇▆ |
| sched_arr_time | 0 | 1.00 | 1496.79 | 495.16 | 1.00 | 1103.00 | 1510.00 | 1910.00 | 2359.00 | ▁▃▇▇▇ |
| arr_delay | 10239 | 0.98 | 0.16 | 47.21 | -85.00 | -17.00 | -9.00 | 2.00 | 2649.00 | ▇▁▁▁▁ |
| flight | 0 | 1.00 | 413.37 | 269.57 | 1.00 | 189.00 | 387.00 | 600.00 | 1322.00 | ▇▇▆▂▁ |
| air_time | 10239 | 0.98 | 94.59 | 63.12 | 15.00 | 50.00 | 74.00 | 130.00 | 581.00 | ▇▂▁▁▁ |
| distance | 0 | 1.00 | 641.00 | 488.23 | 74.00 | 296.00 | 500.00 | 957.00 | 4475.00 | ▇▂▁▁▁ |
| hour | 0 | 1.00 | 13.41 | 4.79 | 0.00 | 9.00 | 13.00 | 17.00 | 23.00 | ▁▇▇▇▆ |
| minute | 0 | 1.00 | 27.49 | 17.94 | 0.00 | 11.00 | 27.00 | 44.00 | 59.00 | ▇▆▇▆▆ |
| temp | 441760 | 0.05 | 42.16 | 15.40 | -4.00 | 32.00 | 37.90 | 48.90 | 90.00 | ▁▆▇▂▁ |
| dewp | 441762 | 0.05 | 31.91 | 13.42 | -9.00 | 23.00 | 28.90 | 39.90 | 75.90 | ▁▆▇▃▁ |
| humid | 441773 | 0.05 | 68.87 | 15.10 | 25.87 | 57.93 | 71.82 | 80.66 | 100.00 | ▁▃▅▇▃ |
| wind_dir | 9205 | 0.98 | 181.02 | 109.46 | 0.00 | 80.00 | 200.00 | 270.00 | 360.00 | ▇▃▆▇▆ |
| wind_speed | 4367 | 0.99 | 8.59 | 5.64 | 0.00 | 4.60 | 8.06 | 11.51 | 42.58 | ▇▆▁▁▁ |
| wind_gust | 4367 | 0.99 | 9.88 | 6.50 | 0.00 | 5.30 | 9.27 | 13.24 | 49.00 | ▇▆▁▁▁ |
| precip | 430846 | 0.07 | 0.01 | 0.02 | 0.00 | 0.00 | 0.00 | 0.01 | 0.44 | ▇▁▁▁▁ |
| pressure | 447131 | 0.04 | 1018.83 | 7.60 | 990.40 | 1014.10 | 1019.00 | 1023.40 | 1038.50 | ▁▂▇▇▂ |
| visib | 1934 | 1.00 | 8.18 | 2.56 | 0.06 | 7.00 | 10.00 | 10.00 | 10.00 | ▁▁▁▂▇ |
| plane_year | 21647 | 0.95 | 2008.12 | 7.15 | 1987.00 | 2003.00 | 2007.00 | 2015.00 | 2021.00 | ▁▂▇▃▅ |
| plane_engines | 11140 | 0.98 | 2.00 | 0.02 | 2.00 | 2.00 | 2.00 | 2.00 | 3.00 | ▇▁▁▁▁ |
| plane_seats | 11140 | 0.98 | 127.86 | 66.68 | 20.00 | 80.00 | 95.00 | 182.00 | 451.00 | ▇▃▂▁▁ |
| plane_speed | 11140 | 0.98 | 0.01 | 1.72 | 0.00 | 0.00 | 0.00 | 0.00 | 438.00 | ▇▁▁▁▁ |
Variable type: POSIXct
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| time_hour | 0 | 1 | 2019-01-01 05:00:00 | 2021-12-31 22:00:00 | 2020-03-26 06:00:00 | 19059 |
glimpse(miFlights)## Rows: 463,818
## Columns: 37
## $ year <dbl> 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 201…
## $ month <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ day <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ dep_time <dbl> 55, 455, 506, 531, 534, 550, 555, 555, 555, 600, 60…
## $ sched_dep_time <dbl> 2115, 500, 511, 535, 545, 600, 600, 555, 600, 600, …
## $ dep_delay <dbl> 220, -5, -5, -4, -11, -10, -5, 0, -5, 0, -2, 3, 4, …
## $ arr_time <dbl> 426, 830, 710, 647, 750, 712, 822, 709, 755, 559, 9…
## $ sched_arr_time <dbl> 2323, 834, 730, 710, 742, 748, 834, 715, 817, 615, …
## $ arr_delay <dbl> 303, -4, -20, -23, 8, -36, -12, -6, -22, -16, 3, -7…
## $ carrier <chr> "OH", "YX", "AA", "WN", "B6", "YX", "OO", "WN", "DL…
## $ flight <dbl> 1019, 954, 185, 203, 310, 790, 803, 295, 348, 218, …
## $ tailnum <chr> "N567NN", "N433YX", "N853NN", "N227WN", "N203JB", "…
## $ origin <chr> "DTW", "GRR", "DTW", "DTW", "DTW", "DTW", "FNT", "D…
## $ dest <chr> "CLT", "MIA", "DFW", "BWI", "BOS", "EWR", "ATL", "D…
## $ air_time <dbl> 88, 187, 162, 64, 71, 61, 125, 174, 105, 45, 179, 1…
## $ distance <dbl> 500, 1214, 986, 409, 632, 488, 645, 1123, 640, 228,…
## $ hour <dbl> 21, 5, 5, 5, 5, 6, 6, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
## $ minute <dbl> 15, 0, 11, 35, 45, 0, 0, 55, 0, 0, 3, 0, 0, 5, 0, 1…
## $ time_hour <dttm> 2019-01-01 21:00:00, 2019-01-01 05:00:00, 2019-01-…
## $ temp <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ dewp <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ humid <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ wind_dir <dbl> 10, 320, 290, 290, 290, 290, 330, 290, 290, 290, 29…
## $ wind_speed <dbl> 8.05546, 9.20624, 19.56326, 19.56326, 19.56326, 13.…
## $ wind_gust <dbl> 9.270062, 10.594357, 22.513008, 22.513008, 22.51300…
## $ precip <dbl> NA, 1e-04, NA, NA, NA, NA, 1e-04, NA, NA, NA, NA, N…
## $ pressure <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ visib <dbl> 1.25, 9.00, 7.00, 7.00, 7.00, 4.00, 9.00, 7.00, 10.…
## $ carrier_name <chr> "PSA Airlines Inc.", "Republic Airline", "American …
## $ plane_year <dbl> 2015, 2014, 2010, 2005, 2006, 2016, 2006, 1999, 200…
## $ plane_type <chr> "Fixed wing multi engine", "Fixed wing multi engine…
## $ plane_manufacturer <chr> "BOMBARDIER INC", "EMBRAER S A", "BOEING", "BOEING"…
## $ plane_model <chr> "CL-600-2D24", "ERJ 170-200 LR", "737-823", "737-7H…
## $ plane_engines <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ plane_seats <dbl> 95, 88, 162, 140, 20, 88, 95, 149, 100, 140, 162, 1…
## $ plane_speed <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ plane_engine <chr> "Turbo-fan", "Turbo-fan", "Turbo-fan", "Turbo-fan",…
# Making a Bar chart with geom_bar()
miFlights %>% ggplot(aes(x = origin)) +
geom_bar() +
labs(title = "Michigan flights, 2019-2021",
x = "Origin",
y = "Number of flights")# Making a Bar chart with geom_col()
miFlights %>% count(origin) %>%
ggplot(aes(x = fct_reorder(origin, -n),
y = n)) +
geom_col() +
labs(title = "Michigan flights, 2019-2021",
x = "Origin",
y = "Number of flights")janFlights <- miFlights %>% filter(month == 1, day == 1)
janFlights %>% slice_head(n = 5)## # A tibble: 5 × 37
## year month day dep_time sched_dep…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2019 1 1 55 2115 220 426 2323 303 OH
## 2 2019 1 1 455 500 -5 830 834 -4 YX
## 3 2019 1 1 506 511 -5 710 730 -20 AA
## 4 2019 1 1 531 535 -4 647 710 -23 WN
## 5 2019 1 1 534 545 -11 750 742 8 B6
## # … with 27 more variables: flight <dbl>, tailnum <chr>, origin <chr>,
## # dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## # time_hour <dttm>, temp <dbl>, dewp <dbl>, humid <dbl>, wind_dir <dbl>,
## # wind_speed <dbl>, wind_gust <dbl>, precip <dbl>, pressure <dbl>,
## # visib <dbl>, carrier_name <chr>, plane_year <dbl>, plane_type <chr>,
## # plane_manufacturer <chr>, plane_model <chr>, plane_engines <dbl>,
## # plane_seats <dbl>, plane_speed <dbl>, plane_engine <chr>, and abbreviated …
dec25 <- miFlights %>% filter(month == 12, day == 25)
dec25 %>% slice_head(n = 5)## # A tibble: 5 × 37
## year month day dep_time sched_dep…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2019 12 25 522 535 -13 709 732 -23 B6
## 2 2019 12 25 540 545 -5 736 803 -27 DL
## 3 2019 12 25 552 557 -5 827 812 15 OH
## 4 2019 12 25 557 600 -3 904 922 -18 F9
## 5 2019 12 25 557 600 -3 723 734 -11 NK
## # … with 27 more variables: flight <dbl>, tailnum <chr>, origin <chr>,
## # dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## # time_hour <dttm>, temp <dbl>, dewp <dbl>, humid <dbl>, wind_dir <dbl>,
## # wind_speed <dbl>, wind_gust <dbl>, precip <dbl>, pressure <dbl>,
## # visib <dbl>, carrier_name <chr>, plane_year <dbl>, plane_type <chr>,
## # plane_manufacturer <chr>, plane_model <chr>, plane_engines <dbl>,
## # plane_seats <dbl>, plane_speed <dbl>, plane_engine <chr>, and abbreviated …
# All Flights November or December
novDec <- miFlights %>% filter(month == 11 | month == 12)
novDec <- miFlights %>% filter(month %in% c(11, 12))
# All flights not in November or December
novDec <- miFlights %>% filter(month != 11 & month != 12)
novDec <- miFlights %>% filter(!(month %in% c(11, 12)))airports <- c("Detroit Metro Airport" = "DTW",
"Gerald R.Ford International Airport" = "GRR",
"Flint Bishp International Airport" = "FNT",
"Capital Region International Airport" = "LAN")
# bar chart using geom_col()
miFlights %>% count(origin) %>%
mutate(origin = fct_recode(origin, !!!airports)) %>%
ggplot(aes(x = fct_reorder(origin, n),
y = n)) +
geom_col() +
coord_flip() +
labs(title = "Michigan flights, 2019-2021",
x = "Origin",
y = "Number of flights",
caption = "Data Source:anyFlights R package") +
scale_y_continuous(labels = comma) +
theme_bw()# All missing departure time flights
miFlightsMiss <- miFlights %>% filter(is.na(dep_time))
# All non-missing departure time flights
miFlightsComplete <- miFlights %>% filter(!(is.na(dep_time)))# Sort miFlights by the day of the flight (smallest to largest), and print the first 4 columns and 5 rows of the resulting data set using the slice_head() function.
miFlights %>% arrange(day) %>% select(year:dep_time) %>% slice_head(n = 5)## # A tibble: 5 × 4
## year month day dep_time
## <dbl> <dbl> <dbl> <dbl>
## 1 2019 1 1 55
## 2 2019 1 1 455
## 3 2019 1 1 506
## 4 2019 1 1 531
## 5 2019 1 1 534
#Sort miFlights by the day of the flight (largest to smallest), and print the first 4 columns and 5 rows of the resulting data set using the slice_head() function.
miFlights %>% arrange(desc(day)) %>% select(year:dep_time) %>% slice_head(n = 5)## # A tibble: 5 × 4
## year month day dep_time
## <dbl> <dbl> <dbl> <dbl>
## 1 2019 1 31 59
## 2 2019 1 31 535
## 3 2019 1 31 540
## 4 2019 1 31 548
## 5 2019 1 31 549
# Sort miFlights by the year, month, and day of the flight.
miFlightsSorted <- miFlights %>% arrange(year, month, day)# Sort miFlights to find the 3 most delayed flights (arr_delay), and the 3 that left the earliest relative to their scheduled departure (dep_delay).
miFlights %>% arrange(desc(arr_delay)) %>% select(year, month, day, arr_delay) %>% slice_head(n = 3)## # A tibble: 3 × 4
## year month day arr_delay
## <dbl> <dbl> <dbl> <dbl>
## 1 2019 2 17 2649
## 2 2021 7 12 1961
## 3 2019 12 19 1792
# Sort miFlights to find the fastest (highest speed) flights. Hint: speed is equal to distance / air_time.
miFlights %>% arrange(desc(distance / air_time)) %>% slice_head(n = 3)## # A tibble: 3 × 37
## year month day dep_time sched_dep…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2019 7 14 1230 1215 15 1339 1326 13 OO
## 2 2020 8 21 1645 1605 40 1621 1626 -5 OO
## 3 2021 11 21 1000 1000 0 1119 1132 -13 OO
## # … with 27 more variables: flight <dbl>, tailnum <chr>, origin <chr>,
## # dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## # time_hour <dttm>, temp <dbl>, dewp <dbl>, humid <dbl>, wind_dir <dbl>,
## # wind_speed <dbl>, wind_gust <dbl>, precip <dbl>, pressure <dbl>,
## # visib <dbl>, carrier_name <chr>, plane_year <dbl>, plane_type <chr>,
## # plane_manufacturer <chr>, plane_model <chr>, plane_engines <dbl>,
## # plane_seats <dbl>, plane_speed <dbl>, plane_engine <chr>, and abbreviated …
# For flights coming out of GRR, find the 3 flights that traveled the farthest (distance) and that arrived the earliest in the morning (arr_time) simultaneously.
miFlights %>% filter(origin == "GRR") %>% arrange(desc(distance), arr_time) %>% slice_head(n = 3)## # A tibble: 3 × 37
## year month day dep_time sched_dep…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2021 3 28 555 600 -5 714 758 -44 G4
## 2 2021 4 11 552 600 -8 716 740 -24 G4
## 3 2021 3 18 558 600 -2 720 758 -38 G4
## # … with 27 more variables: flight <dbl>, tailnum <chr>, origin <chr>,
## # dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## # time_hour <dttm>, temp <dbl>, dewp <dbl>, humid <dbl>, wind_dir <dbl>,
## # wind_speed <dbl>, wind_gust <dbl>, precip <dbl>, pressure <dbl>,
## # visib <dbl>, carrier_name <chr>, plane_year <dbl>, plane_type <chr>,
## # plane_manufacturer <chr>, plane_model <chr>, plane_engines <dbl>,
## # plane_seats <dbl>, plane_speed <dbl>, plane_engine <chr>, and abbreviated …
#Drop the year and month columns from miFlights creating a new data set called miDropped.
miDropped <- miFlights %>% select(-year, -month)
# Drop all variables between year and day columns (inclusive) from miFlights creating a new data set called miDropped2.
miDropped2 <- miFlights %>% select(-(year : day))#We can use select() and everything() to rearrange columns and still drop columns too:
rearrangedMIFlights <- miFlights %>% select(time_hour, air_time, everything(), -day)#Create a subset of the miFlights data set called timeFlights that only contains variables that end with the word “time”.
timeFlights <- miFlights %>% select(ends_with("time"))
#Create a new data frame called departureInfo that only has variables that start with “dep”
departureInfo <- miFlights %>% select(starts_with("dep"))
#Create a new data frame called newFlights by rearranging the columns of the full miFlights data set so that flight number (flight), origin (origin), and destination (dest) are provided first, then all other columns except the tail number (tailnum).
newFlights <- miFlights %>% select(flight, origin, dest, everything(), -tailnum)# creating a new gain variable
flights_sml <- miFlights %>% select(ends_with("delay"), distance, air_time)
flights_sml %>% mutate(gain = dep_delay - arr_delay) %>%
slice_head(n = 5)## # A tibble: 5 × 5
## dep_delay arr_delay distance air_time gain
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 220 303 500 88 -83
## 2 -5 -4 1214 187 -1
## 3 -5 -20 986 162 15
## 4 -4 -23 409 64 19
## 5 -11 8 632 71 -19
# Creating a filghtSpeeds data set which has speed as a feature
flightSpeeds <- flights_sml %>% mutate(gain = dep_delay - arr_delay,
speed = distance/air_time)flightSpeeds %>% ggplot(aes(x = speed*60,
y = gain,
color = distance)) +
geom_point(alpha = 0.10) +
labs(title = "Michigan Flights gains by speed, 2019 - 2021",
x = "Speed (miles per hour)",
y = "Gain (minutes)",
caption = "Data Source:anyFlights R package" ) +
theme_bw() +
theme(legend.position = "bottom")miFlights %>% group_by(year, month, day) %>%
summarize(delay = mean(dep_delay, na.rm = TRUE)) %>%
slice_head(n = 5)## # A tibble: 180 × 4
## # Groups: year, month [36]
## year month day delay
## <dbl> <dbl> <dbl> <dbl>
## 1 2019 1 1 8.58
## 2 2019 1 2 12.5
## 3 2019 1 3 0.721
## 4 2019 1 4 -0.477
## 5 2019 1 5 2.17
## 6 2019 2 1 27.8
## 7 2019 2 2 13.0
## 8 2019 2 3 1.49
## 9 2019 2 4 7.91
## 10 2019 2 5 1.91
## # … with 170 more rows
delaySummary <- miFlights %>% group_by(carrier_name) %>%
summarize(delay = mean(arr_delay, na.rm = T))delaySummary %>% ggplot(aes(x = fct_reorder(carrier_name, delay),
y = delay,
fill = delay > 0)) +
geom_col() +
labs(title = "Average flight delays by carrier ",
subtitle = "Michigan Flights gains by speed, 2019 - 2021",
x = "Carrier",
y = "Average delay (minutes)",
caption = "Data Source:anyFlights R package") +
scale_fill_manual(values = c("#D55E00", "#0072B2")) +
theme_bw() +
theme(legend.position = "none",
text = element_text(face = "bold"),
axis.text.x = element_text(angle = 45,
size = 8,
vjust = 0.70))monthlyDelays <- miFlights %>% group_by(month) %>%
summarize(delay = median(arr_delay, na.rm = T))
monthlyDelays %>% knitr::kable()| month | delay |
|---|---|
| 1 | -13 |
| 2 | -11 |
| 3 | -12 |
| 4 | -10 |
| 5 | -9 |
| 6 | -7 |
| 7 | -8 |
| 8 | -8 |
| 9 | -10 |
| 10 | -8 |
| 11 | -10 |
| 12 | -10 |
# creating a line chart
monthlyDelays %>% ggplot(aes(x = month,
y = delay)) +
geom_line(color = "dodgerblue") +
labs(title = "Monthly arrival delays",
subtitle = "Michigan flights, 2019 - 2021",
x = "Month",
y = "Median delay (minutes)",
caption = "Data Source:anyFlights R package") +
scale_x_continuous(breaks = 1 : 12) +
theme_bw()monthlyAirportDelays <- miFlights %>% group_by(month, origin) %>%
summarize(delay = median(arr_delay, na.rm = T))
monthlyAirportDelays %>% knitr::kable()| month | origin | delay |
|---|---|---|
| 1 | DTW | -13 |
| 1 | FNT | -10 |
| 1 | GRR | -11 |
| 1 | LAN | -12 |
| 2 | DTW | -11 |
| 2 | FNT | -4 |
| 2 | GRR | -6 |
| 2 | LAN | -9 |
| 3 | DTW | -12 |
| 3 | FNT | -11 |
| 3 | GRR | -11 |
| 3 | LAN | -13 |
| 4 | DTW | -10 |
| 4 | FNT | -14 |
| 4 | GRR | -12 |
| 4 | LAN | -12 |
| 5 | DTW | -8 |
| 5 | FNT | -10 |
| 5 | GRR | -10 |
| 5 | LAN | -10 |
| 6 | DTW | -7 |
| 6 | FNT | -7 |
| 6 | GRR | -7 |
| 6 | LAN | -8 |
| 7 | DTW | -8 |
| 7 | FNT | -6 |
| 7 | GRR | -8 |
| 7 | LAN | -8 |
| 8 | DTW | -8 |
| 8 | FNT | -8 |
| 8 | GRR | -8 |
| 8 | LAN | -9 |
| 9 | DTW | -10 |
| 9 | FNT | -10 |
| 9 | GRR | -10 |
| 9 | LAN | -10 |
| 10 | DTW | -8 |
| 10 | FNT | -7 |
| 10 | GRR | -8 |
| 10 | LAN | -9 |
| 11 | DTW | -10 |
| 11 | FNT | -9 |
| 11 | GRR | -9 |
| 11 | LAN | -10 |
| 12 | DTW | -10 |
| 12 | FNT | -7 |
| 12 | GRR | -8 |
| 12 | LAN | -13 |
# creating a line chart
monthlyAirportDelays %>% ggplot(aes(x = month,
y = delay,
color = origin)) +
geom_line() +
labs(title = "Monthly arrival delays",
subtitle = "Michigan flights, 2019 - 2021",
x = "Month",
y = "Median delay (minutes)",
caption = "Data Source:anyFlights R package") +
scale_x_continuous(breaks = 1 : 12) +
theme_bw()monthlyAirportDelays1 <- miFlights %>% group_by(month, origin) %>%
summarize(delay = mean(arr_delay, na.rm = T))
monthlyAirportDelays1 %>% knitr::kable()| month | origin | delay |
|---|---|---|
| 1 | DTW | -2.5621779 |
| 1 | FNT | 2.0756534 |
| 1 | GRR | 3.0469067 |
| 1 | LAN | 3.6784566 |
| 2 | DTW | 0.7469195 |
| 2 | FNT | 10.2602378 |
| 2 | GRR | 7.6872737 |
| 2 | LAN | 11.7045075 |
| 3 | DTW | -5.3431079 |
| 3 | FNT | -1.4900662 |
| 3 | GRR | -1.1165397 |
| 3 | LAN | -0.5157233 |
| 4 | DTW | -2.0344987 |
| 4 | FNT | -4.7945792 |
| 4 | GRR | -2.8542573 |
| 4 | LAN | -0.6356073 |
| 5 | DTW | -0.9764161 |
| 5 | FNT | 3.8609078 |
| 5 | GRR | -2.2899173 |
| 5 | LAN | 1.8093588 |
| 6 | DTW | 3.8633216 |
| 6 | FNT | 9.5192308 |
| 6 | GRR | 6.4788484 |
| 6 | LAN | 8.3279857 |
| 7 | DTW | 5.0758884 |
| 7 | FNT | 6.1802326 |
| 7 | GRR | 6.8374690 |
| 7 | LAN | 10.9069374 |
| 8 | DTW | 2.3602065 |
| 8 | FNT | 5.2003130 |
| 8 | GRR | 3.8260135 |
| 8 | LAN | 1.6283892 |
| 9 | DTW | -3.4418731 |
| 9 | FNT | -0.2061538 |
| 9 | GRR | -2.3189819 |
| 9 | LAN | -0.9334416 |
| 10 | DTW | -1.1352272 |
| 10 | FNT | 1.5730337 |
| 10 | GRR | 3.0774160 |
| 10 | LAN | 2.9658385 |
| 11 | DTW | -0.0162294 |
| 11 | FNT | -1.4349650 |
| 11 | GRR | 1.0087202 |
| 11 | LAN | 2.2409867 |
| 12 | DTW | 0.8812990 |
| 12 | FNT | 5.8241206 |
| 12 | GRR | 4.9704201 |
| 12 | LAN | -2.8902027 |
# creating a line chart
monthlyAirportDelays1 %>% ggplot(aes(x = month,
y = delay,
color = origin)) +
geom_line() +
labs(title = "Average daily flight delays",
subtitle = "Michigan flights, 2019 - 2021",
x = "Month",
y = "Mean delay (minutes)",
caption = "Data Source:anyFlights R package") +
scale_x_continuous(breaks = 1 : 12) +
theme_bw()