##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Warning: package 'plotly' was built under R version 4.0.4
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#read in data from GitHub
airlines_csv<-read.csv("https://gist.githubusercontent.com/trishitanath334/5d95c2228059b5c4bc90cc6e79786cd6/raw/20bd9121578b1fefbb7ad5a7b8bfb6588c92d3a3/airlines.csv",header=TRUE,sep=",")
#Visualize csv data
airlines_csv
## X X.1 Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1 Alaska on time 497 221 212 503 1841
## 2 delayed 62 12 20 102 305
## 3 NA NA NA NA NA
## 4 AM West on time 694 4840 383 320 201
## 5 delayed 117 415 65 129 61
airlines_info<-airlines_csv %>%
filter(!is.na(Seattle)) %>%
rename("Airline"=X,"Arrival Status"=X.1,"Los Angeles"=Los.Angeles,"San Diego"=San.Diego,"San Francisco"=San.Francisco)
# Add the missing airline names
airlines_info$Airline<-as.character(airlines_info$Airline)
airlines_info$Airline[2]<-c("Alaska")
airlines_info$Airline[4]<-c("AM West")
#Visualize data
airlines_info
## Airline Arrival Status Los Angeles Phoenix San Diego San Francisco Seattle
## 1 Alaska on time 497 221 212 503 1841
## 2 Alaska delayed 62 12 20 102 305
## 3 AM West on time 694 4840 383 320 201
## 4 AM West delayed 117 415 65 129 61
Order data by Airline then by Arrival Status then finally by City
ordered_data<-airlines_info %>%
gather(key=City,value="Amount",3:7)
# order data
ordered_data<-arrange(ordered_data,Airline,desc(`Arrival Status`), City)
#Visualize data
ordered_data
## Airline Arrival Status City Amount
## 1 Alaska on time Los Angeles 497
## 2 Alaska on time Phoenix 221
## 3 Alaska on time San Diego 212
## 4 Alaska on time San Francisco 503
## 5 Alaska on time Seattle 1841
## 6 Alaska delayed Los Angeles 62
## 7 Alaska delayed Phoenix 12
## 8 Alaska delayed San Diego 20
## 9 Alaska delayed San Francisco 102
## 10 Alaska delayed Seattle 305
## 11 AM West on time Los Angeles 694
## 12 AM West on time Phoenix 4840
## 13 AM West on time San Diego 383
## 14 AM West on time San Francisco 320
## 15 AM West on time Seattle 201
## 16 AM West delayed Los Angeles 117
## 17 AM West delayed Phoenix 415
## 18 AM West delayed San Diego 65
## 19 AM West delayed San Francisco 129
## 20 AM West delayed Seattle 61
ordered_data %>%
group_by(Airline) %>%
summarize(`Total Flights`=sum(Amount))
## # A tibble: 2 x 2
## Airline `Total Flights`
## * <chr> <int>
## 1 Alaska 3775
## 2 AM West 7225
computed_data<-ordered_data %>%
spread(`Arrival Status`,Amount) %>%
mutate(Total = delayed+`on time`,`On time ratio`=`on time`/Total,`Delayed Ratio`=delayed/Total)
# View data
computed_data
## Airline City delayed on time Total On time ratio Delayed Ratio
## 1 Alaska Los Angeles 62 497 559 0.8890877 0.11091234
## 2 Alaska Phoenix 12 221 233 0.9484979 0.05150215
## 3 Alaska San Diego 20 212 232 0.9137931 0.08620690
## 4 Alaska San Francisco 102 503 605 0.8314050 0.16859504
## 5 Alaska Seattle 305 1841 2146 0.8578751 0.14212488
## 6 AM West Los Angeles 117 694 811 0.8557337 0.14426634
## 7 AM West Phoenix 415 4840 5255 0.9210276 0.07897241
## 8 AM West San Diego 65 383 448 0.8549107 0.14508929
## 9 AM West San Francisco 129 320 449 0.7126949 0.28730512
## 10 AM West Seattle 61 201 262 0.7671756 0.23282443
ggplotly(ggplot(computed_data,aes(x=City,y=`Delayed Ratio`))+geom_bar(aes(fill=Airline),stat="identity",position="dodge")+xlab("City")+ylab("Ratio of Delayed Flights")+ggtitle("Flights Delayed by City"))
The highest ratio of delayed flights are from AM West Airlines; with the leading route being AM West to San Francisco flights which are delayed approximately 28.73% of the time. The lowest ratio of delayed flights are from Alaska Airlines, with flights to Phoenix having an approximate delay of about 5%. In Overall flights to San Francisco have higher delay ratios ## Graph of on-time Flights
ggplotly(ggplot(computed_data,aes(x=City,y=`On time ratio`))+geom_bar(aes(fill=Airline),stat="identity",position="dodge")+xlab("City")+ylab("Ratio of On Time Flights")+ggtitle("On Time Flights by City"))
Flights to Phoenix the highest on time flight ratios with Alaska at 94.84% and AM West at 92.1%.