NYC Flights Homework

Author

Steve Donfack

Load the packages

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(nycflights23)

data(flights)
data(airlines)
head(airlines)
# A tibble: 6 × 2
  carrier name                  
  <chr>   <chr>                 
1 9E      Endeavor Air Inc.     
2 AA      American Airlines Inc.
3 AS      Alaska Airlines Inc.  
4 B6      JetBlue Airways       
5 DL      Delta Air Lines Inc.  
6 F9      Frontier Airlines Inc.
head(flights)
# A tibble: 6 × 19
   year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
  <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
1  2023     1     1        1           2038       203      328              3
2  2023     1     1       18           2300        78      228            135
3  2023     1     1       31           2344        47      500            426
4  2023     1     1       33           2140       173      238           2352
5  2023     1     1       36           2048       228      223           2252
6  2023     1     1      503            500         3      808            815
# ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
#   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
#   hour <dbl>, minute <dbl>, time_hour <dttm>

let’s create a new database, first in order to display only flies from spirit airlines for the six first month of the year with a delay departure time sup or equal to sixty minutes.

spirit_airlines <- flights |>
  filter(carrier == "NK") |>
  filter(month %in% c("1", "2", "3", "4", "5", "6")) |>
  filter(dep_delay >= "60") |>
  select(year, day, dep_delay, carrier,month) 
head(spirit_airlines)
# A tibble: 6 × 5
   year   day dep_delay carrier month
  <int> <int>     <dbl> <chr>   <int>
1  2023     1        94 NK          1
2  2023     1         9 NK          1
3  2023     1        65 NK          1
4  2023     2         7 NK          1
5  2023     2        86 NK          1
6  2023     2        60 NK          1

Let’s graph it

  ggplot(spirit_airlines, aes(x = day, y = dep_delay, fill = month )) +
  geom_bar(position = "dodge", stat = "identity") +
  labs(x = "Days",
       y = "Delay time in minutes",
       size = "Spirit_airlines",
       title = "Evolution of flights delay on the six first month of 2023 for spririt airlines")