R Markdown
Invoke libraty
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Compare on time to delay ratio of two flights
table<-read.csv('flight.csv',sep=',')
table<-tbl_df(table)
gather(table,To,n,3:7) %>%
group_by(From,Status)%>%
summarise(Total =sum(n))%>%
spread(Status,Total)%>%
data.frame()%>%
mutate(ratio=round((on.time/delayed)))
## From delayed on.time ratio
## 1 Alaska 501 3256 6
## 2 Amwest 787 6438 8
Mean and SD of delay and on time of two flights
gather(table,To,n,3:7)%>%
group_by(From,Status)%>%
summarise(mean=mean(n),sd= sd(n))
## # A tibble: 4 x 4
## # Groups: From [?]
## From Status mean sd
## <fctr> <fctr> <dbl> <dbl>
## 1 Alaska delayed 100.2 120.0175
## 2 Alaska on time 651.2 679.1923
## 3 Amwest delayed 157.4 147.1625
## 4 Amwest on time 1287.6 1994.1889
Top delay and on time city for both flight
gather(table,To,n,3:7)%>%
group_by(From,Status)%>%
summarise(n=max(n))%>%
arrange(desc(Status),desc(n) )%>%
inner_join(gather(table,To,n,3:7),by=c('From','Status','n'))%>%
select(From,To,Status,n)%>%
arrange(desc(Status),desc(n))
## # A tibble: 4 x 4
## # Groups: From [2]
## From To Status n
## <fctr> <chr> <fctr> <dbl>
## 1 Amwest Phoenix on time 4840
## 2 Alaska Seattle on time 1841
## 3 Amwest Phoenix delayed 415
## 4 Alaska Seattle delayed 305
On time difference for each city
gather(table,To,n,3:7)%>%
filter(Status=='on time' )%>%
arrange(From)%>%
spread(From,n)%>%
mutate(difference = abs(Alaska-Amwest))%>%
select(Alaska,Amwest,difference,To)%>%
arrange(difference)
## # A tibble: 5 x 4
## Alaska Amwest difference To
## <int> <int> <int> <chr>
## 1 212 383 171 San.Diego
## 2 503 320 183 San.Franciso
## 3 479 694 215 Los.Angeles
## 4 1841 201 1640 Seattle
## 5 221 4840 4619 Phoenix