NYC_Flights_2013_JPannala

Author

Janithri Pannala

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(nycflights13)
data("flights")
str(flights)
tibble [336,776 × 19] (S3: tbl_df/tbl/data.frame)
 $ year          : int [1:336776] 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
 $ month         : int [1:336776] 1 1 1 1 1 1 1 1 1 1 ...
 $ day           : int [1:336776] 1 1 1 1 1 1 1 1 1 1 ...
 $ dep_time      : int [1:336776] 517 533 542 544 554 554 555 557 557 558 ...
 $ sched_dep_time: int [1:336776] 515 529 540 545 600 558 600 600 600 600 ...
 $ dep_delay     : num [1:336776] 2 4 2 -1 -6 -4 -5 -3 -3 -2 ...
 $ arr_time      : int [1:336776] 830 850 923 1004 812 740 913 709 838 753 ...
 $ sched_arr_time: int [1:336776] 819 830 850 1022 837 728 854 723 846 745 ...
 $ arr_delay     : num [1:336776] 11 20 33 -18 -25 12 19 -14 -8 8 ...
 $ carrier       : chr [1:336776] "UA" "UA" "AA" "B6" ...
 $ flight        : int [1:336776] 1545 1714 1141 725 461 1696 507 5708 79 301 ...
 $ tailnum       : chr [1:336776] "N14228" "N24211" "N619AA" "N804JB" ...
 $ origin        : chr [1:336776] "EWR" "LGA" "JFK" "JFK" ...
 $ dest          : chr [1:336776] "IAH" "IAH" "MIA" "BQN" ...
 $ air_time      : num [1:336776] 227 227 160 183 116 150 158 53 140 138 ...
 $ distance      : num [1:336776] 1400 1416 1089 1576 762 ...
 $ hour          : num [1:336776] 5 5 5 5 6 5 6 6 6 6 ...
 $ minute        : num [1:336776] 15 29 40 45 0 58 0 0 0 0 ...
 $ time_hour     : POSIXct[1:336776], format: "2013-01-01 05:00:00" "2013-01-01 05:00:00" ...
head(flights)
# A tibble: 6 × 19
   year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
  <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
1  2013     1     1      517            515         2      830            819
2  2013     1     1      533            529         4      850            830
3  2013     1     1      542            540         2      923            850
4  2013     1     1      544            545        -1     1004           1022
5  2013     1     1      554            600        -6      812            837
6  2013     1     1      554            558        -4      740            728
# ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
#   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
#   hour <dbl>, minute <dbl>, time_hour <dttm>
flights_by_origin <- tibble(month=flights$month, origin=flights$origin, arr_delay=flights$arr_delay, dest= flights$dest)
flights_by_origin$month[flights_by_origin$month == 1]<- "January"
flights_by_origin$month[flights_by_origin$month == 2]<- "February"
flights_by_origin$month[flights_by_origin$month == 3]<- "March"
flights_by_origin$month[flights_by_origin$month == 4]<- "April"
flights_by_origin$month[flights_by_origin$month == 5]<- "May"
flights_by_origin$month[flights_by_origin$month == 6]<- "June"
flights_by_origin$month[flights_by_origin$month == 7]<- "July"
flights_by_origin$month[flights_by_origin$month == 8]<- "August"
flights_by_origin$month[flights_by_origin$month == 9]<- "September"
flights_by_origin$month[flights_by_origin$month == 10]<- "October"
flights_by_origin$month[flights_by_origin$month == 11]<- "November"
flights_by_origin$month[flights_by_origin$month == 12]<- "December"

 
flights_by_origin$month<-factor(flights_by_origin$month, levels=c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"))
bar_graph <- flights_by_origin |>
  filter(month %in% c("June", "July", "August", "September")) |>
  ggplot() +
  geom_bar(aes(x=month, y=arr_delay,  fill = origin),
      position = "dodge", stat = "identity") +
  scale_fill_manual(values = c("hotpink", "lightpink", "gray"))
  labs(y =  "Number of Flights",
       title = "Delay Arrival Flights in the Summer of  2013",
       fill = "origin",
      caption = "U.S Bureau of Transportation of Statistics")
$y
[1] "Number of Flights"

$fill
[1] "origin"

$title
[1] "Delay Arrival Flights in the Summer of  2013"

$caption
[1] "U.S Bureau of Transportation of Statistics"

attr(,"class")
[1] "labels"
bar_graph
Warning: Removed 3435 rows containing missing values (`geom_bar()`).

summary(flights)
      year          month             day           dep_time    sched_dep_time
 Min.   :2013   Min.   : 1.000   Min.   : 1.00   Min.   :   1   Min.   : 106  
 1st Qu.:2013   1st Qu.: 4.000   1st Qu.: 8.00   1st Qu.: 907   1st Qu.: 906  
 Median :2013   Median : 7.000   Median :16.00   Median :1401   Median :1359  
 Mean   :2013   Mean   : 6.549   Mean   :15.71   Mean   :1349   Mean   :1344  
 3rd Qu.:2013   3rd Qu.:10.000   3rd Qu.:23.00   3rd Qu.:1744   3rd Qu.:1729  
 Max.   :2013   Max.   :12.000   Max.   :31.00   Max.   :2400   Max.   :2359  
                                                 NA's   :8255                 
   dep_delay          arr_time    sched_arr_time   arr_delay       
 Min.   : -43.00   Min.   :   1   Min.   :   1   Min.   : -86.000  
 1st Qu.:  -5.00   1st Qu.:1104   1st Qu.:1124   1st Qu.: -17.000  
 Median :  -2.00   Median :1535   Median :1556   Median :  -5.000  
 Mean   :  12.64   Mean   :1502   Mean   :1536   Mean   :   6.895  
 3rd Qu.:  11.00   3rd Qu.:1940   3rd Qu.:1945   3rd Qu.:  14.000  
 Max.   :1301.00   Max.   :2400   Max.   :2359   Max.   :1272.000  
 NA's   :8255      NA's   :8713                  NA's   :9430      
   carrier              flight       tailnum             origin         
 Length:336776      Min.   :   1   Length:336776      Length:336776     
 Class :character   1st Qu.: 553   Class :character   Class :character  
 Mode  :character   Median :1496   Mode  :character   Mode  :character  
                    Mean   :1972                                        
                    3rd Qu.:3465                                        
                    Max.   :8500                                        
                                                                        
     dest              air_time        distance         hour      
 Length:336776      Min.   : 20.0   Min.   :  17   Min.   : 1.00  
 Class :character   1st Qu.: 82.0   1st Qu.: 502   1st Qu.: 9.00  
 Mode  :character   Median :129.0   Median : 872   Median :13.00  
                    Mean   :150.7   Mean   :1040   Mean   :13.18  
                    3rd Qu.:192.0   3rd Qu.:1389   3rd Qu.:17.00  
                    Max.   :695.0   Max.   :4983   Max.   :23.00  
                    NA's   :9430                                  
     minute        time_hour                     
 Min.   : 0.00   Min.   :2013-01-01 05:00:00.00  
 1st Qu.: 8.00   1st Qu.:2013-04-04 13:00:00.00  
 Median :29.00   Median :2013-07-03 10:00:00.00  
 Mean   :26.23   Mean   :2013-07-03 05:22:54.64  
 3rd Qu.:44.00   3rd Qu.:2013-10-01 07:00:00.00  
 Max.   :59.00   Max.   :2013-12-31 23:00:00.00  
                                                 
summary(flights_by_origin$arr_delay)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
 -86.000  -17.000   -5.000    6.895   14.000 1272.000     9430 

Summary

I created this visualization to show how arrivals were delayed during the months of June to September in 2013. June - September is known as the “Summer Season”. Each bar shows the data from that assigned month. As you can see in the graph, June has the higest number of flight delays from JFk. So, I do not believe that June is the most opportune time for people to travel from this airport. John F Kennedy Airport seems to have the highest rate of delays during this season, however during the month of august, LaGuardia airport has a minimal rate of delay. Overall, EWR has the lowest rate of delays from all listed. As you can see, each airport has a different rate of delay which affects the ease of travel for people in that region.