library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
airlinesdata<-read.csv("Data607-Week5 Assignment.csv",header=TRUE,sep=",")
head(airlinesdata)
## Airlines Arrival Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1 ALASKA On time 497 221 212 503 1841
## 2 Delayed 62 12 20 102 305
## 3 AM WEST On time 694 4840 383 320 201
## 4 Delayed 117 415 65 129 61
selectdata<-airlinesdata %>%
filter(!is.na(Phoenix)) %>%
rename("Los Angeles"=Los.Angeles,"San Diego"=San.Diego,"San Francisco"=San.Francisco)
selectdata$Airlines<-as.character(selectdata$Airlines)
selectdata$Airlines[selectdata$Airlines==""]<-c("ALASKA","AM WEST")
selectdata
## Airlines Arrival Los Angeles Phoenix San Diego San Francisco Seattle
## 1 ALASKA On time 497 221 212 503 1841
## 2 ALASKA Delayed 62 12 20 102 305
## 3 AM WEST On time 694 4840 383 320 201
## 4 AM WEST Delayed 117 415 65 129 61
tidydata<-selectdata %>%
gather(key=Cities,value=Number,3:7)
tidydata<-arrange(tidydata,Airlines,Arrival)
head(tidydata)
## Airlines Arrival Cities Number
## 1 ALASKA Delayed Los Angeles 62
## 2 ALASKA Delayed Phoenix 12
## 3 ALASKA Delayed San Diego 20
## 4 ALASKA Delayed San Francisco 102
## 5 ALASKA Delayed Seattle 305
## 6 ALASKA On time Los Angeles 497
tidydata %>%
group_by(Airlines,Arrival) %>%
summarize(Total=sum(Number))
## # A tibble: 4 x 3
## # Groups: Airlines [?]
## Airlines Arrival Total
## <chr> <fct> <int>
## 1 ALASKA Delayed 501
## 2 ALASKA On time 3274
## 3 AM WEST Delayed 787
## 4 AM WEST On time 6438
tidydata %>%
group_by(Airlines) %>%
summarize(Total=sum(Number))
## # A tibble: 2 x 2
## Airlines Total
## <chr> <int>
## 1 ALASKA 3775
## 2 AM WEST 7225
ggplotly(ggplot(filter(tidydata,Arrival=="Delayed"),aes(Cities,Number,color=Airlines))+geom_point(size=5)+ggtitle("Delayed Flights")+ylab("Number of Delayed"))
spreadata<-tidydata %>%
spread(Arrival,Number) %>%
mutate(Total=Delayed+`On time`,prop=Delayed/Total)
ggplotly(ggplot(spreadata,aes(x=Cities,y=prop))+geom_bar(aes(fill=Airlines),stat="identity",position="dodge", colour="#D55E00")+ylab("Proportion of Delayed")+ggtitle("Delayed Flights"))