library(nycflights13)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0     ✔ purrr   1.0.1
## ✔ tibble  3.1.8     ✔ dplyr   1.1.0
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.3     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
str(flights)
## tibble [336,776 × 19] (S3: tbl_df/tbl/data.frame)
##  $ year          : int [1:336776] 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
##  $ month         : int [1:336776] 1 1 1 1 1 1 1 1 1 1 ...
##  $ day           : int [1:336776] 1 1 1 1 1 1 1 1 1 1 ...
##  $ dep_time      : int [1:336776] 517 533 542 544 554 554 555 557 557 558 ...
##  $ sched_dep_time: int [1:336776] 515 529 540 545 600 558 600 600 600 600 ...
##  $ dep_delay     : num [1:336776] 2 4 2 -1 -6 -4 -5 -3 -3 -2 ...
##  $ arr_time      : int [1:336776] 830 850 923 1004 812 740 913 709 838 753 ...
##  $ sched_arr_time: int [1:336776] 819 830 850 1022 837 728 854 723 846 745 ...
##  $ arr_delay     : num [1:336776] 11 20 33 -18 -25 12 19 -14 -8 8 ...
##  $ carrier       : chr [1:336776] "UA" "UA" "AA" "B6" ...
##  $ flight        : int [1:336776] 1545 1714 1141 725 461 1696 507 5708 79 301 ...
##  $ tailnum       : chr [1:336776] "N14228" "N24211" "N619AA" "N804JB" ...
##  $ origin        : chr [1:336776] "EWR" "LGA" "JFK" "JFK" ...
##  $ dest          : chr [1:336776] "IAH" "IAH" "MIA" "BQN" ...
##  $ air_time      : num [1:336776] 227 227 160 183 116 150 158 53 140 138 ...
##  $ distance      : num [1:336776] 1400 1416 1089 1576 762 ...
##  $ hour          : num [1:336776] 5 5 5 5 6 5 6 6 6 6 ...
##  $ minute        : num [1:336776] 15 29 40 45 0 58 0 0 0 0 ...
##  $ time_hour     : POSIXct[1:336776], format: "2013-01-01 05:00:00" "2013-01-01 05:00:00" ...
PHL_monthly_flights <- flights %>%
  #Eliminate the NAs
filter(!is.na(arr_delay)) %>% 
    filter(dest == "PHL" & arr_delay<= 12 & year == 2013)
PHL_monthly_flights
## # A tibble: 1,076 × 19
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     1      908        915      -7    1004    1033     -29 US     
##  2  2013     1     1     1600       1610     -10    1712    1729     -17 9E     
##  3  2013     1     1     2000       2000       0    2054    2110     -16 9E     
##  4  2013     1     2      602        600       2     646     659     -13 US     
##  5  2013     1     2      743        745      -2     858     857       1 9E     
##  6  2013     1     2     1606       1610      -4    1730    1729       1 9E     
##  7  2013     1     2     2003       2015     -12    2102    2125     -23 9E     
##  8  2013     1     3      556        600      -4     700     659       1 US     
##  9  2013     1     3      949        955      -6    1051    1100      -9 9E     
## 10  2013     1     3     1250       1300     -10    1346    1406     -20 9E     
## # … with 1,066 more rows, 9 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
flights_barplot <- PHL_monthly_flights %>%
  ggplot() + 
  geom_bar(mapping =aes(x=month, y = arr_delay, fill = month, color = "orange"),
           position = "dodge", stat = "identity") +
  ggtitle("PHL's Flight Arrival Delays in 2013") +
  ylab("Number of Flight arrival delays") +
  labs(fill = "Flight Delays - Months")
flights_barplot

In reviewing the dataset I noticed that Philadelphia International Airport (PHL) had a large number of flights that arrived early or on time during 2013. By filtering the data,1076 rows, there were no real outliers when comparing arrivals from the three NYC airports (JFK, EWR, and LGA). Higher arrival times appear in month 3,6,7,8,9,10,11,and 12. The use of a second color (orange) helps to illustrate the limited number of flights that arrived late. The use of the second color also helps the reader see the large and short gaps, by month, where flights arrived either late, on-time, or early.