NYC Flights Assignment

Author

Nhi Vu

Load the library and data

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(nycflights23)
data(flights)
data(airlines)

Rename the Months

flights$month[flights$month == 1]<- "January"
flights$month[flights$month == 2]<- "February"
flights$month[flights$month == 3]<- "March"
flights$month[flights$month == 4]<- "April"
flights$month[flights$month == 5]<- "May"
flights$month[flights$month == 6]<- "June"
flights$month[flights$month == 7]<- "July"
flights$month[flights$month == 8]<- "August"
flights$month[flights$month == 9]<- "September"
flights$month[flights$month == 10]<- "October"
flights$month[flights$month == 11]<- "November"
flights$month[flights$month == 12]<- "December"

Select only the columns I want and filter out the condition that I want

delayed_flight <- flights |>
  select(year, month, day, dep_delay, arr_delay, carrier) |>
  filter(dep_delay > 0) |>
  group_by(carrier) |>
  summarise(count = n())
head(delayed_flight)
# A tibble: 6 × 2
  carrier count
  <chr>   <int>
1 9E      13583
2 AA      12657
3 AS       2886
4 B6      30942
5 DL      23604
6 F9        737

Join the airlines dataset

datajoin <- delayed_flight |>
  left_join(airlines, by = "carrier")
head(datajoin)
# A tibble: 6 × 3
  carrier count name                  
  <chr>   <int> <chr>                 
1 9E      13583 Endeavor Air Inc.     
2 AA      12657 American Airlines Inc.
3 AS       2886 Alaska Airlines Inc.  
4 B6      30942 JetBlue Airways       
5 DL      23604 Delta Air Lines Inc.  
6 F9        737 Frontier Airlines Inc.

Now let’s get plotting

bar1 <- ggplot(datajoin, aes(x = carrier, y = count, fill = name)) +
  geom_bar(position = "dodge", stat = "identity") +
  labs(fill = "Name of Carrier",
       x = "Name of Carriers",
       y = "Count of Delayed Flights",
       title = "Number of Delayed Flights by Airlines",
       caption = "Source: FAA Aircraft registry") +
  coord_flip()
bar1