Using “tidyr” and “dplyr” functions

library(stringr)
library(readr)
library(tidyr)
library(dplyr)
library(ggplot2)
library(plotly)

Read and input data:

airline <-read.csv("~/airlinedelays.csv")
## Warning in read.table(file = file, header = header, sep = sep, quote
## = quote, : incomplete final line found by readTableHeader on '~/
## airlinedelays.csv'
airlinename <- airline %>% rename(airline=X, arrival=X.1, "Los Angeles"=Los.Angeles, "San Diego"=San.Diego, "San Francisco"=San.Francisco)
airlinename
##   airline arrival Los Angeles Phoenix San Diego San Francisco Seattle
## 1  ALASKA on time         497     221       212           503    1841
## 2         delayed          62      12        20           102     305
## 3 AM WEST on time         694    4840       383           320     201
## 4         delayed         117     415        65           129      61

Filling the blank record:

for(i in 1:nrow(airlinename)){
  if(i %% 2 ==0){
    airlinename$airline[i] <- airlinename$airline[i-1]
  }
}
airlinename
##   airline arrival Los Angeles Phoenix San Diego San Francisco Seattle
## 1  ALASKA on time         497     221       212           503    1841
## 2  ALASKA delayed          62      12        20           102     305
## 3 AM WEST on time         694    4840       383           320     201
## 4 AM WEST delayed         117     415        65           129      61
airline1 <-airlinename %>% gather(airport, time,  3:7 )
airline1
##    airline arrival       airport time
## 1   ALASKA on time   Los Angeles  497
## 2   ALASKA delayed   Los Angeles   62
## 3  AM WEST on time   Los Angeles  694
## 4  AM WEST delayed   Los Angeles  117
## 5   ALASKA on time       Phoenix  221
## 6   ALASKA delayed       Phoenix   12
## 7  AM WEST on time       Phoenix 4840
## 8  AM WEST delayed       Phoenix  415
## 9   ALASKA on time     San Diego  212
## 10  ALASKA delayed     San Diego   20
## 11 AM WEST on time     San Diego  383
## 12 AM WEST delayed     San Diego   65
## 13  ALASKA on time San Francisco  503
## 14  ALASKA delayed San Francisco  102
## 15 AM WEST on time San Francisco  320
## 16 AM WEST delayed San Francisco  129
## 17  ALASKA on time       Seattle 1841
## 18  ALASKA delayed       Seattle  305
## 19 AM WEST on time       Seattle  201
## 20 AM WEST delayed       Seattle   61
airline2 <-airline1
airlineontime<-airline2 %>% filter(airline2$arrival == "on time")
airlineontime
##    airline arrival       airport time
## 1   ALASKA on time   Los Angeles  497
## 2  AM WEST on time   Los Angeles  694
## 3   ALASKA on time       Phoenix  221
## 4  AM WEST on time       Phoenix 4840
## 5   ALASKA on time     San Diego  212
## 6  AM WEST on time     San Diego  383
## 7   ALASKA on time San Francisco  503
## 8  AM WEST on time San Francisco  320
## 9   ALASKA on time       Seattle 1841
## 10 AM WEST on time       Seattle  201
mean(airlineontime$time)
## [1] 971.2
airlineontime %>% summarise(Min = min(airlineontime$time, na.rm=TRUE),
                     Median = median(airlineontime$time, na.rm=TRUE),
                     Mean = mean(airlineontime$time, na.rm=TRUE),
                     Var = var(airlineontime$time, na.rm=TRUE),
                     SD = sd(airlineontime$time, na.rm=TRUE),
                     Max = max(airlineontime$time, na.rm=TRUE),
                     N = n())
##   Min Median  Mean     Var      SD  Max  N
## 1 201    440 971.2 2083057 1443.28 4840 10
airline3 <-airline1
airlinedelayed<-airline3 %>% filter(airline3$arrival == "delayed")
airlinedelayed
##    airline arrival       airport time
## 1   ALASKA delayed   Los Angeles   62
## 2  AM WEST delayed   Los Angeles  117
## 3   ALASKA delayed       Phoenix   12
## 4  AM WEST delayed       Phoenix  415
## 5   ALASKA delayed     San Diego   20
## 6  AM WEST delayed     San Diego   65
## 7   ALASKA delayed San Francisco  102
## 8  AM WEST delayed San Francisco  129
## 9   ALASKA delayed       Seattle  305
## 10 AM WEST delayed       Seattle   61
mean(airlinedelayed$time)
## [1] 128.8
airlinedelayed %>% summarise(Min = min(airlinedelayed$time, na.rm=TRUE),
                     Median = median(airlinedelayed$time, na.rm=TRUE),
                     Mean = mean(airlinedelayed$time, na.rm=TRUE),
                     Var = var(airlinedelayed$time, na.rm=TRUE),
                     SD = sd(airlinedelayed$time, na.rm=TRUE),
                     Max = max(airlinedelayed$time, na.rm=TRUE),
                     N = n())
##   Min Median  Mean      Var       SD Max  N
## 1  12   83.5 128.8 16935.96 130.1382 415 10
ggplotly(ggplot(airlinedelayed,aes(x=airport,y=time))+geom_bar(aes(fill=airline),stat="identity",position="dodge")+ylab("Time of Delayed Flights")+ggtitle("Delayed Flights by Region"))

```