NYC Flights homework

Author

Ike C

Load the Dataset

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(nycflights23)
data(flights)
data(airlines)

Visualization of the data

library(dplyr)
any(is.na(flights))
[1] TRUE
summary(flights)
      year          month             day           dep_time     sched_dep_time
 Min.   :2023   Min.   : 1.000   Min.   : 1.00   Min.   :   1    Min.   : 500  
 1st Qu.:2023   1st Qu.: 3.000   1st Qu.: 8.00   1st Qu.: 931    1st Qu.: 930  
 Median :2023   Median : 6.000   Median :16.00   Median :1357    Median :1359  
 Mean   :2023   Mean   : 6.423   Mean   :15.74   Mean   :1366    Mean   :1364  
 3rd Qu.:2023   3rd Qu.: 9.000   3rd Qu.:23.00   3rd Qu.:1804    3rd Qu.:1759  
 Max.   :2023   Max.   :12.000   Max.   :31.00   Max.   :2400    Max.   :2359  
                                                 NA's   :10738                 
   dep_delay          arr_time     sched_arr_time   arr_delay       
 Min.   : -50.00   Min.   :   1    Min.   :   1   Min.   : -97.000  
 1st Qu.:  -6.00   1st Qu.:1105    1st Qu.:1135   1st Qu.: -22.000  
 Median :  -2.00   Median :1519    Median :1551   Median : -10.000  
 Mean   :  13.84   Mean   :1497    Mean   :1552   Mean   :   4.345  
 3rd Qu.:  10.00   3rd Qu.:1946    3rd Qu.:2007   3rd Qu.:   9.000  
 Max.   :1813.00   Max.   :2400    Max.   :2359   Max.   :1812.000  
 NA's   :10738     NA's   :11453                  NA's   :12534     
   carrier              flight         tailnum             origin         
 Length:435352      Min.   :   1.0   Length:435352      Length:435352     
 Class :character   1st Qu.: 364.0   Class :character   Class :character  
 Mode  :character   Median : 734.0   Mode  :character   Mode  :character  
                    Mean   : 785.2                                        
                    3rd Qu.:1188.0                                        
                    Max.   :1972.0                                        
                                                                          
     dest              air_time        distance           hour      
 Length:435352      Min.   : 18.0   Min.   :  80.0   Min.   : 5.00  
 Class :character   1st Qu.: 77.0   1st Qu.: 479.0   1st Qu.: 9.00  
 Mode  :character   Median :121.0   Median : 762.0   Median :13.00  
                    Mean   :141.8   Mean   : 977.5   Mean   :13.35  
                    3rd Qu.:177.0   3rd Qu.:1182.0   3rd Qu.:17.00  
                    Max.   :701.0   Max.   :4983.0   Max.   :23.00  
                    NA's   :12534                                   
     minute        time_hour                     
 Min.   : 0.00   Min.   :2023-01-01 05:00:00.00  
 1st Qu.:10.00   1st Qu.:2023-03-30 20:00:00.00  
 Median :29.00   Median :2023-06-27 08:00:00.00  
 Mean   :28.53   Mean   :2023-06-29 10:02:22.39  
 3rd Qu.:45.00   3rd Qu.:2023-09-27 11:00:00.00  
 Max.   :59.00   Max.   :2023-12-31 23:00:00.00  
                                                 

Most flights from JFK and LAX, and the Average arrival delay

results <- flights %>%
  filter(origin == "JFK", dest == "LAX") %>%
  group_by(carrier) %>%
  summarize(
    num_flights = n(),
    avg_arr_delay = mean(arr_delay, na.rm = TRUE)
  ) %>%
  inner_join(airlines, by = "carrier") %>%
  arrange(desc(num_flights))
results
# A tibble: 3 × 4
  carrier num_flights avg_arr_delay name                  
  <chr>         <int>         <dbl> <chr>                 
1 B6             3584          4.68 JetBlue Airways       
2 AA             3246         -5.74 American Airlines Inc.
3 DL             3215          6.23 Delta Air Lines Inc.  

Combination of the Dataset Airlines and Flights

library(dplyr)
combined_data <-flights |>
 full_join(airlines, by= "carrier")

Plot

library(ggplot2)
 Visual <- ggplot(data = combined_data,aes(x= name, y = flight, fill = carrier)) +
  geom_col() +
  coord_flip() + 
  labs(
    title = "Number of Flights by Airline from NYC 2023",
    x = "Airline",
    y = "Number of Flights ",
    fill = "Airline code",
    caption = "Source: nycflights23 dataset (flights & airlines tables)"
  ) +
 scale_fill_brewer(palette = "Set3") +
  theme_minimal()
 Visual
Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set3 is 12
Returning the palette you asked for with that many colors

#Recognition of ChatGPT for the line scale_fil_brewer.