#open
dat = read.csv("https://raw.githubusercontent.com/delagroove/dataScience/master/flightsinfo.csv", header = TRUE,na.strings=c("","NA"))
## gather the table by city
res <- gather(dat, "los.angeles", "phoenix", "san.diego", "san.francisco", "seattle", key="city", value="count")
## remove Na values on count
res <- drop_na(res,count)
#rename columns
colnames(res)[1] <- "airline"
colnames(res)[2] <- "status"
#fill the empty values for airline with the value from above.
res <- fill(res, "airline")
## get the total count of flights to a city
sums <- group_by(res, city, airline) %>% summarise(sum(count))
#duplicate a row in between to use the sum for the calculation
sums <- sums[rep(seq_len(nrow(sums)), each=2),]
#initialize new column to store ratio
res$ratio = 0
## save the ratio of flights on time/delayed vs total flights to that city.
for(the_city in levels(as.factor(res$city))){
for(the_airline in levels(res$airline)){
res[which(res$city==the_city,res$airline==the_airline),]$ratio <- res[which(res$city==the_city,res$airline==the_airline),]$count/sums[which(sums$city==the_city,sums$airline==the_airline),]$`sum(count)`
}
}
(res)
## airline status city count ratio
## 1 ALASKA on time los.angeles 497 0.88908766
## 2 ALASKA delayed los.angeles 62 0.11091234
## 4 AM WEST on time los.angeles 694 0.85573366
## 5 AM WEST delayed los.angeles 117 0.14426634
## 6 ALASKA on time phoenix 221 0.94849785
## 7 ALASKA delayed phoenix 12 0.05150215
## 9 AM WEST on time phoenix 4840 0.92102759
## 10 AM WEST delayed phoenix 415 0.07897241
## 11 ALASKA on time san.diego 212 0.91379310
## 12 ALASKA delayed san.diego 20 0.08620690
## 14 AM WEST on time san.diego 383 0.85491071
## 15 AM WEST delayed san.diego 65 0.14508929
## 16 ALASKA on time san.francisco 503 0.83140496
## 17 ALASKA delayed san.francisco 102 0.16859504
## 19 AM WEST on time san.francisco 320 0.71269488
## 20 AM WEST delayed san.francisco 129 0.28730512
## 21 ALASKA on time seattle 1841 0.85787512
## 22 ALASKA delayed seattle 305 0.14212488
## 24 AM WEST on time seattle 201 0.76717557
## 25 AM WEST delayed seattle 61 0.23282443
We can assess here that Alaska has more percentage of flights on time than AM WEST, with pronounced differences in the flights to san francisco and seattle. The difference is not significant at all in Phoenix, where AM West operation is quite big.
A recommendation for AM West would be to upgrade operations in San Francisco and Seattle specially where their operations are constantly delayed.