NYC flights hw2

Author

Fatimah Niyas

#install.packages("nycflights13")
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(nycflights13)
library(alluvial)
library(ggalluvial)
library(RColorBrewer)
data(flights)
head(flights)
# A tibble: 6 × 19
   year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
  <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
1  2013     1     1      517            515         2      830            819
2  2013     1     1      533            529         4      850            830
3  2013     1     1      542            540         2      923            850
4  2013     1     1      544            545        -1     1004           1022
5  2013     1     1      554            600        -6      812            837
6  2013     1     1      554            558        -4      740            728
# ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
#   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
#   hour <dbl>, minute <dbl>, time_hour <dttm>
flights <- flights
summary(flights$origin)
   Length     Class      Mode 
   336776 character character 
flights_nona <- flights |>
  filter(!is.na(dep_delay))

ewrflight <- flights |>
  filter(origin == "EWR")|>
  select(month, origin)
ewrflight
# A tibble: 120,835 × 2
   month origin
   <int> <chr> 
 1     1 EWR   
 2     1 EWR   
 3     1 EWR   
 4     1 EWR   
 5     1 EWR   
 6     1 EWR   
 7     1 EWR   
 8     1 EWR   
 9     1 EWR   
10     1 EWR   
# ℹ 120,825 more rows
ewravg <- ewrflight |>
  group_by(month) |>
  summarise(ewravg = mean(n()))
ewravg
# A tibble: 12 × 2
   month ewravg
   <int>  <dbl>
 1     1   9893
 2     2   9107
 3     3  10420
 4     4  10531
 5     5  10592
 6     6  10175
 7     7  10475
 8     8  10359
 9     9   9550
10    10  10104
11    11   9707
12    12   9922
ewravg$month <-
  factor(ewravg$month,
         levels = 1:12,
         labels = c(
      "January",
      "February",
      "March",
      "April",
      "May",
      "June",
      "July",
      "August",
      "September",
      "October",
      "November",
      "December"),
      ordered = TRUE)
avgflights <-  
  ggplot(ewravg, aes( x= month, y = ewravg , fill = month)) + 
  geom_bar(stat = "identity")+
  theme_minimal() + scale_fill_brewer(palette = "Set3") + coord_flip() +
  labs(title = "EWR Average Monthly Flights",
       x = "Months",
       y = "Number of Flights", 
       caption = "FAA Aircraft registry")
avgflights

The bar graph that I created shows the average number of flights departing from EWR. I flipped the bar to be horizontal so that the months don’t overlap with each other and used the palette set 3 to give it color. The most average number of flights departing from EWR is May-July, which is typically the most popular time to travel.