## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Warning: package 'plotly' was built under R version 4.0.4
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Read csv from my github

#read in data from GitHub
airlines_csv<-read.csv("https://gist.githubusercontent.com/trishitanath334/5d95c2228059b5c4bc90cc6e79786cd6/raw/20bd9121578b1fefbb7ad5a7b8bfb6588c92d3a3/airlines.csv",header=TRUE,sep=",")

#Visualize csv data
airlines_csv
##         X     X.1 Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1  Alaska on time         497     221       212           503    1841
## 2         delayed          62      12        20           102     305
## 3                          NA      NA        NA            NA      NA
## 4 AM West on time         694    4840       383           320     201
## 5         delayed         117     415        65           129      61

Remove empty values and create data frame

airlines_info<-airlines_csv %>%  
  filter(!is.na(Seattle)) %>% 
    rename("Airline"=X,"Arrival Status"=X.1,"Los Angeles"=Los.Angeles,"San Diego"=San.Diego,"San Francisco"=San.Francisco)

# Add the missing airline names 
airlines_info$Airline<-as.character(airlines_info$Airline)
airlines_info$Airline[2]<-c("Alaska")
airlines_info$Airline[4]<-c("AM West")

#Visualize data
airlines_info
##   Airline Arrival Status Los Angeles Phoenix San Diego San Francisco Seattle
## 1  Alaska        on time         497     221       212           503    1841
## 2  Alaska        delayed          62      12        20           102     305
## 3 AM West        on time         694    4840       383           320     201
## 4 AM West        delayed         117     415        65           129      61

City columns into rows

Order data by Airline then by Arrival Status then finally by City

ordered_data<-airlines_info %>% 
  gather(key=City,value="Amount",3:7)

# order data 
ordered_data<-arrange(ordered_data,Airline,desc(`Arrival Status`), City)

#Visualize data
ordered_data
##    Airline Arrival Status          City Amount
## 1   Alaska        on time   Los Angeles    497
## 2   Alaska        on time       Phoenix    221
## 3   Alaska        on time     San Diego    212
## 4   Alaska        on time San Francisco    503
## 5   Alaska        on time       Seattle   1841
## 6   Alaska        delayed   Los Angeles     62
## 7   Alaska        delayed       Phoenix     12
## 8   Alaska        delayed     San Diego     20
## 9   Alaska        delayed San Francisco    102
## 10  Alaska        delayed       Seattle    305
## 11 AM West        on time   Los Angeles    694
## 12 AM West        on time       Phoenix   4840
## 13 AM West        on time     San Diego    383
## 14 AM West        on time San Francisco    320
## 15 AM West        on time       Seattle    201
## 16 AM West        delayed   Los Angeles    117
## 17 AM West        delayed       Phoenix    415
## 18 AM West        delayed     San Diego     65
## 19 AM West        delayed San Francisco    129
## 20 AM West        delayed       Seattle     61

Total flights

ordered_data %>% 
  group_by(Airline) %>% 
  summarize(`Total Flights`=sum(Amount))
## # A tibble: 2 x 2
##   Airline `Total Flights`
## * <chr>             <int>
## 1 Alaska             3775
## 2 AM West            7225

Compute total amount, on-time ratio and delayed ratio of flights

computed_data<-ordered_data %>% 
  spread(`Arrival Status`,Amount) %>% 
  mutate(Total = delayed+`on time`,`On time ratio`=`on time`/Total,`Delayed Ratio`=delayed/Total)

# View data
computed_data
##    Airline          City delayed on time Total On time ratio Delayed Ratio
## 1   Alaska   Los Angeles      62     497   559     0.8890877    0.11091234
## 2   Alaska       Phoenix      12     221   233     0.9484979    0.05150215
## 3   Alaska     San Diego      20     212   232     0.9137931    0.08620690
## 4   Alaska San Francisco     102     503   605     0.8314050    0.16859504
## 5   Alaska       Seattle     305    1841  2146     0.8578751    0.14212488
## 6  AM West   Los Angeles     117     694   811     0.8557337    0.14426634
## 7  AM West       Phoenix     415    4840  5255     0.9210276    0.07897241
## 8  AM West     San Diego      65     383   448     0.8549107    0.14508929
## 9  AM West San Francisco     129     320   449     0.7126949    0.28730512
## 10 AM West       Seattle      61     201   262     0.7671756    0.23282443

Graph of delayed Flights

ggplotly(ggplot(computed_data,aes(x=City,y=`Delayed Ratio`))+geom_bar(aes(fill=Airline),stat="identity",position="dodge")+xlab("City")+ylab("Ratio of Delayed Flights")+ggtitle("Flights Delayed by City"))

The highest ratio of delayed flights are from AM West Airlines; with the leading route being AM West to San Francisco flights which are delayed approximately 28.73% of the time. The lowest ratio of delayed flights are from Alaska Airlines, with flights to Phoenix having an approximate delay of about 5%. In Overall flights to San Francisco have higher delay ratios ## Graph of on-time Flights

ggplotly(ggplot(computed_data,aes(x=City,y=`On time ratio`))+geom_bar(aes(fill=Airline),stat="identity",position="dodge")+xlab("City")+ylab("Ratio of On Time Flights")+ggtitle("On Time Flights by City"))

Flights to Phoenix the highest on time flight ratios with Alaska at 94.84% and AM West at 92.1%.