NYC Flights Assignment

Author

Jhonathan Urquilla

Loading libraries

library(ggplot2)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(nycflights23)

Merge airline names into flights

flights_named <- merge(flights, airlines, by = "carrier")

Remove rows with missing air_time, distance, or arr_delay

flights_clean <- filter(flights_named,
                        !is.na(air_time) &
                        !is.na(distance) &
                        !is.na(arr_delay))

Count number of flights by airline

airline_counts <- table(flights_clean$name)
top_airlines <- names(sort(airline_counts, decreasing = TRUE))[1:4]

Filter manually for top 4 airlines

flights_top <- flights_clean[
  flights_clean$name == top_airlines[1] |
  flights_clean$name == top_airlines[2] |
  flights_clean$name == top_airlines[3] |
  flights_clean$name == top_airlines[4], ]

Create a status column (no mutate)

flights_top$status <- ifelse(flights_top$arr_delay < 0, "Early",
                      ifelse(flights_top$arr_delay == 0, "On time", "Delayed"))

Scatter plot with facets

ggplot(flights_top, aes(x = distance, y = air_time, color = status)) +
  geom_point(alpha = 0.4, size = .8) +
  facet_wrap(~ name) +
  scale_color_manual(
    values = c("Early" = "lightgreen", "On time" = "darkblue", "Delayed" = "orchid"),
    name = "Arrival Status"
  ) +
  labs(
    title = "Flight Distance vs Air Time by Airline and Arrival Status",
    x = "Distance (miles)",
    y = "Air Time (minutes)",
    caption = "Source: nycflights23 (2013 NYC Flight Data)"
  ) +
  theme_minimal(base_size = 12)