library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
airlinesdata<-read.csv("Data607-Week5 Assignment.csv",header=TRUE,sep=",")

head(airlinesdata)
##   Airlines Arrival Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1   ALASKA On time         497     221       212           503    1841
## 2          Delayed          62      12        20           102     305
## 3  AM WEST On time         694    4840       383           320     201
## 4          Delayed         117     415        65           129      61
selectdata<-airlinesdata %>%  
  filter(!is.na(Phoenix)) %>% 
    rename("Los Angeles"=Los.Angeles,"San Diego"=San.Diego,"San Francisco"=San.Francisco)

selectdata$Airlines<-as.character(selectdata$Airlines)
selectdata$Airlines[selectdata$Airlines==""]<-c("ALASKA","AM WEST")

selectdata
##   Airlines Arrival Los Angeles Phoenix San Diego San Francisco Seattle
## 1   ALASKA On time         497     221       212           503    1841
## 2   ALASKA Delayed          62      12        20           102     305
## 3  AM WEST On time         694    4840       383           320     201
## 4  AM WEST Delayed         117     415        65           129      61
tidydata<-selectdata %>% 
  gather(key=Cities,value=Number,3:7)
tidydata<-arrange(tidydata,Airlines,Arrival)
head(tidydata)
##   Airlines Arrival        Cities Number
## 1   ALASKA Delayed   Los Angeles     62
## 2   ALASKA Delayed       Phoenix     12
## 3   ALASKA Delayed     San Diego     20
## 4   ALASKA Delayed San Francisco    102
## 5   ALASKA Delayed       Seattle    305
## 6   ALASKA On time   Los Angeles    497
tidydata %>% 
  group_by(Airlines,Arrival) %>% 
  summarize(Total=sum(Number))
## # A tibble: 4 x 3
## # Groups:   Airlines [?]
##   Airlines Arrival Total
##   <chr>    <fct>   <int>
## 1 ALASKA   Delayed   501
## 2 ALASKA   On time  3274
## 3 AM WEST  Delayed   787
## 4 AM WEST  On time  6438
tidydata %>% 
  group_by(Airlines) %>% 
  summarize(Total=sum(Number))
## # A tibble: 2 x 2
##   Airlines Total
##   <chr>    <int>
## 1 ALASKA    3775
## 2 AM WEST   7225
ggplotly(ggplot(filter(tidydata,Arrival=="Delayed"),aes(Cities,Number,color=Airlines))+geom_point(size=5)+ggtitle("Delayed Flights")+ylab("Number of Delayed"))
spreadata<-tidydata %>% 
  spread(Arrival,Number) %>% 
  mutate(Total=Delayed+`On time`,prop=Delayed/Total)
ggplotly(ggplot(spreadata,aes(x=Cities,y=prop))+geom_bar(aes(fill=Airlines),stat="identity",position="dodge", colour="#D55E00")+ylab("Proportion of Delayed")+ggtitle("Delayed Flights"))