library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.1.0     ✓ dplyr   1.0.4
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(nycflights13)
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
str(flights)
## tibble [336,776 × 19] (S3: tbl_df/tbl/data.frame)
##  $ year          : int [1:336776] 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
##  $ month         : int [1:336776] 1 1 1 1 1 1 1 1 1 1 ...
##  $ day           : int [1:336776] 1 1 1 1 1 1 1 1 1 1 ...
##  $ dep_time      : int [1:336776] 517 533 542 544 554 554 555 557 557 558 ...
##  $ sched_dep_time: int [1:336776] 515 529 540 545 600 558 600 600 600 600 ...
##  $ dep_delay     : num [1:336776] 2 4 2 -1 -6 -4 -5 -3 -3 -2 ...
##  $ arr_time      : int [1:336776] 830 850 923 1004 812 740 913 709 838 753 ...
##  $ sched_arr_time: int [1:336776] 819 830 850 1022 837 728 854 723 846 745 ...
##  $ arr_delay     : num [1:336776] 11 20 33 -18 -25 12 19 -14 -8 8 ...
##  $ carrier       : chr [1:336776] "UA" "UA" "AA" "B6" ...
##  $ flight        : int [1:336776] 1545 1714 1141 725 461 1696 507 5708 79 301 ...
##  $ tailnum       : chr [1:336776] "N14228" "N24211" "N619AA" "N804JB" ...
##  $ origin        : chr [1:336776] "EWR" "LGA" "JFK" "JFK" ...
##  $ dest          : chr [1:336776] "IAH" "IAH" "MIA" "BQN" ...
##  $ air_time      : num [1:336776] 227 227 160 183 116 150 158 53 140 138 ...
##  $ distance      : num [1:336776] 1400 1416 1089 1576 762 ...
##  $ hour          : num [1:336776] 5 5 5 5 6 5 6 6 6 6 ...
##  $ minute        : num [1:336776] 15 29 40 45 0 58 0 0 0 0 ...
##  $ time_hour     : POSIXct[1:336776], format: "2013-01-01 05:00:00" "2013-01-01 05:00:00" ...
flights_nona <- na.omit (flights)
planes_nona <- na.omit (planes)
delay_10plus <-
flights_nona %>% 
  group_by(arr_delay)%>%
  filter(arr_delay >=10)
delay_10minus <-
flights_nona %>% 
  group_by(arr_delay)%>%
  filter(between(arr_delay, 1,10))
avg10plus<- delay_10plus %>%                    
    group_by(carrier) %>% 
  summarise(arr_delay = mean(arr_delay))
library(RColorBrewer)
ggplot(avg10plus, aes(x=carrier, y=arr_delay, fill=carrier)) + ggtitle("Average Late Arrival Time by Carrier")+
  geom_bar(stat = "identity") + theme_minimal()

The “Average Late Arrival Time by Carrier” provides information which carriers on average have the most delays. In this graph we can see that on average, carrier “OO” commercially known as SkyWest Airlines had the most delays in 2013. On average, it was delayed by 85 minutes per flight.

avgmonthdelay <- delay_10plus %>%                    
    group_by(month) %>% 
 summarize (count = n(),
             avg = mean (arr_delay))
library(RColorBrewer)
ggplot(avgmonthdelay, aes(x=month, y=avg, fill=month)) + scale_x_discrete(limits=c("Jan","Feb","Mar","Apr","May","Jun", "Jul", "Aug", "Sept", "Oct", "Nov", "Dec"))+ ggtitle("Average Late Arrival by Month")+
  geom_bar(stat = "identity") + theme_minimal()

The “Average Late Arrival by Month” provides information on which months there are are mode delays. In this graph we can see that on average, June and July suffer the most delays. This also correlates to high number of flights on those months which coincides with peak summer season.

library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
data <- 
flights_nona %>%
  group_by(carrier, month) %>%
  summarize(count_by_month = n())
## `summarise()` has grouped output by 'carrier'. You can override using the `.groups` argument.
arr <-acast(data, carrier~month, value.var="count_by_month")
heatmap(arr, Colv = NA, Rowv = NA, scale="column", xlab='Month', ylab='Carrier',
          main='Number of Flights Per Month')

The “Number of Flights Per Month” graph shows that some carriers such as UA has the most flights and carriers as AS has the least flights. The darker areas indicate higher number of floghts. The peak of number of flights occur at different times of year depending on the carrier.