The chart below describes arrival delays for two airlines across five destinations.

image

Part 1:

library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.2.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.2
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Alaska <-sqldf(c("Drop table if exists alaska","CREATE table alaska ( ID serial PRIMARY KEY, Destination VARCHAR (25) NOT NULL, On_Time int NOT NULL, Delayed int NOT NULL)",
  "INSERT INTO alaska (ID, Destination, On_Time, Delayed) Values ('1','Los Angeles','497','62'), ('2','Phoenix','221','12'), ('3','San Diego','212','20'), ('4','San Francisco','503','102'), ('5','Seattle','1841','305')", "Select * From alaska"))
## Loading required package: tcltk
Alaska
##   ID   Destination On_Time Delayed
## 1  1   Los Angeles     497      62
## 2  2       Phoenix     221      12
## 3  3     San Diego     212      20
## 4  4 San Francisco     503     102
## 5  5       Seattle    1841     305
AM_West <-sqldf(c("Drop table if exists AM_West","CREATE table AM_West ( ID serial PRIMARY KEY, Destination VARCHAR (25) NOT NULL, On_Time int NOT NULL, Delayed int NOT NULL)",
  "INSERT INTO AM_West (ID, Destination, On_Time, Delayed) Values ('1','Los Angeles','694','117'), ('2','Phoenix','4840','415'), ('3','San Diego','383','65'), ('4','San Francisco','320','129'), ('5','Seattle','201','61')", "Select * From AM_West"))
AM_West
##   ID   Destination On_Time Delayed
## 1  1   Los Angeles     694     117
## 2  2       Phoenix    4840     415
## 3  3     San Diego     383      65
## 4  4 San Francisco     320     129
## 5  5       Seattle     201      61

Up

Alaska1 <- Alaska %>% select(-ID) %>% t()
colnames(Alaska1) <- Alaska1[1,]
Alaska1 <- Alaska1[-1,]
Alaska1
##         Los Angeles Phoenix San Diego San Francisco Seattle
## On_Time " 497"      " 221"  " 212"    " 503"        "1841" 
## Delayed " 62"       " 12"   " 20"     "102"         "305"
AM_West1 <- AM_West %>% select(-ID) %>% t()
colnames(AM_West1) <- AM_West1[1,]
AM_West1 <- AM_West1[-1,]
AM_West1
##         Los Angeles Phoenix San Diego San Francisco Seattle
## On_Time " 694"      "4840"  " 383"    " 320"        " 201" 
## Delayed "117"       "415"   " 65"     "129"         " 61"

Up

Airlines <- c("Alaska", "Alaska", "AM West", "AM West")
Airlines_Table <- rbind(Alaska1, AM_West1)
Airlines_Table <- cbind(Airlines, Airlines_Table)
Airlines_Table
##         Airlines  Los Angeles Phoenix San Diego San Francisco Seattle
## On_Time "Alaska"  " 497"      " 221"  " 212"    " 503"        "1841" 
## Delayed "Alaska"  " 62"       " 12"   " 20"     "102"         "305"  
## On_Time "AM West" " 694"      "4840"  " 383"    " 320"        " 201" 
## Delayed "AM West" "117"       "415"   " 65"     "129"         " 61"
write.csv(Airlines_Table, file = "C:/Users/Nabila/Documents/GitHub/Class-IS607/Week 6 Assignment/Airlines_Table.csv")

Up

Part 2:

library(curl)
airtable <- read.csv(file="https://raw.githubusercontent.com/nabilahossain/Class-IS607/master/Week%206%20Assignment/Airlines_Table.csv", header=TRUE, sep=",")
airtable
##         X Airlines Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1 On_Time   Alaska         497     221       212           503    1841
## 2 Delayed   Alaska          62      12        20           102     305
## 3 On_Time  AM West         694    4840       383           320     201
## 4 Delayed  AM West         117     415        65           129      61

Up

library(stringr)
airtable1 <- airtable %>%  gather("Destination", "Number_of_Flights", 3:7) %>% select(Airlines, Arrival=X, Destination, Number_of_Flights) 
airtable1$Destination <- str_replace_all(airtable1$Destination, pattern = "\\.", replacement = " ")
airtable1
##    Airlines Arrival   Destination Number_of_Flights
## 1    Alaska On_Time   Los Angeles               497
## 2    Alaska Delayed   Los Angeles                62
## 3   AM West On_Time   Los Angeles               694
## 4   AM West Delayed   Los Angeles               117
## 5    Alaska On_Time       Phoenix               221
## 6    Alaska Delayed       Phoenix                12
## 7   AM West On_Time       Phoenix              4840
## 8   AM West Delayed       Phoenix               415
## 9    Alaska On_Time     San Diego               212
## 10   Alaska Delayed     San Diego                20
## 11  AM West On_Time     San Diego               383
## 12  AM West Delayed     San Diego                65
## 13   Alaska On_Time San Francisco               503
## 14   Alaska Delayed San Francisco               102
## 15  AM West On_Time San Francisco               320
## 16  AM West Delayed San Francisco               129
## 17   Alaska On_Time       Seattle              1841
## 18   Alaska Delayed       Seattle               305
## 19  AM West On_Time       Seattle               201
## 20  AM West Delayed       Seattle                61

Up

at2 <- airtable1 %>% group_by(Airlines, Destination) %>% summarise(Total_Flights = sum(Number_of_Flights)) 
at2
## Source: local data frame [10 x 3]
## Groups: Airlines [?]
## 
##    Airlines   Destination Total_Flights
##      (fctr)         (chr)         (int)
## 1    Alaska   Los Angeles           559
## 2    Alaska       Phoenix           233
## 3    Alaska     San Diego           232
## 4    Alaska San Francisco           605
## 5    Alaska       Seattle          2146
## 6   AM West   Los Angeles           811
## 7   AM West       Phoenix          5255
## 8   AM West     San Diego           448
## 9   AM West San Francisco           449
## 10  AM West       Seattle           262
at3 <- mutate(filter(airtable1, Arrival %in% "Delayed"))
at3
##    Airlines Arrival   Destination Number_of_Flights
## 1    Alaska Delayed   Los Angeles                62
## 2   AM West Delayed   Los Angeles               117
## 3    Alaska Delayed       Phoenix                12
## 4   AM West Delayed       Phoenix               415
## 5    Alaska Delayed     San Diego                20
## 6   AM West Delayed     San Diego                65
## 7    Alaska Delayed San Francisco               102
## 8   AM West Delayed San Francisco               129
## 9    Alaska Delayed       Seattle               305
## 10  AM West Delayed       Seattle                61
Airline_Delay <- at3 %>% select(Airlines, Destination,  Delayed_Flights=Number_of_Flights) %>% inner_join(at2, by = c("Destination", "Airlines")) %>% arrange(Airlines) %>% mutate(Percent_Delayed = (Delayed_Flights / Total_Flights)*100)
Airline_Delay$Percent_Delayed <-  as.numeric(format(round(Airline_Delay$Percent_Delayed, 2), nsmall=2))
Airline_Delay
##    Airlines   Destination Delayed_Flights Total_Flights Percent_Delayed
## 1    Alaska   Los Angeles              62           559           11.09
## 2    Alaska       Phoenix              12           233            5.15
## 3    Alaska     San Diego              20           232            8.62
## 4    Alaska San Francisco             102           605           16.86
## 5    Alaska       Seattle             305          2146           14.21
## 6   AM West   Los Angeles             117           811           14.43
## 7   AM West       Phoenix             415          5255            7.90
## 8   AM West     San Diego              65           448           14.51
## 9   AM West San Francisco             129           449           28.73
## 10  AM West       Seattle              61           262           23.28

Up

Part 3:

library(ggplot2)
library(knitr)
Airline_Delay1 <- Airline_Delay
Airline_Delay1$Percent_Delayed <- paste(Airline_Delay1$Percent_Delayed, "%", sep="")
rownames(Airline_Delay1) <- NULL
kable(Airline_Delay1, caption = "Table 1: Alaska and AM West planes' arrival delays across five destinations.", align = "c")
Table 1: Alaska and AM West planes’ arrival delays across five destinations.
Airlines Destination Delayed_Flights Total_Flights Percent_Delayed
Alaska Los Angeles 62 559 11.09%
Alaska Phoenix 12 233 5.15%
Alaska San Diego 20 232 8.62%
Alaska San Francisco 102 605 16.86%
Alaska Seattle 305 2146 14.21%
AM West Los Angeles 117 811 14.43%
AM West Phoenix 415 5255 7.9%
AM West San Diego 65 448 14.51%
AM West San Francisco 129 449 28.73%
AM West Seattle 61 262 23.28%

I compare the two airlines side by side on the graphs below. We see in the bar graph that for each destination, the arrival delays for AM West airlines is more then Alaska airlines. On the line graph we see that Seattle has the highest percentage difference between the two airlines, almost 9% difference. In Seattle 23.28% of the AM West planes have arrival delays, while 14.21% of the Alaska planes have arrival delays.

ggplot(data = Airline_Delay, aes(x = Destination, y = Percent_Delayed, fill = Airlines)) + geom_bar(stat="identity", position="dodge") + ggtitle("Graph 1: The Percentage of Arrival Delays Across Five Destination") + ylab("Percentage of Planes Delayed")

ggplot(data = Airline_Delay, aes(x = Destination, y = Percent_Delayed, group=Airlines, color = Airlines)) +  geom_line() + geom_point() + ggtitle("Graph 2: The Percentage of Arrival Delays Across Five Destination") + ylab("Percentage of Planes Delayed")

Up

Airline_Delay2 <- Airline_Delay %>% group_by(Airlines) %>% summarise(Delayed_Flights = sum(Delayed_Flights), Total_Flights = sum(Total_Flights)) %>% mutate(Percent_Delayed = (Delayed_Flights / Total_Flights)*100)
Airline_Delay2$Percent_Delayed <- as.numeric(format(round(Airline_Delay2$Percent_Delayed, 2), nsmall=2))
rownames(Airline_Delay2) <- NULL
kable(Airline_Delay2, caption = "Table 2: Alaska and AM West planes' arrival delays.", align = "c")
Table 2: Alaska and AM West planes’ arrival delays.
Airlines Delayed_Flights Total_Flights Percent_Delayed
Alaska 501 3775 13.27
AM West 787 7225 10.89
ggplot(data = Airline_Delay2, aes(x = Airlines, y = Percent_Delayed, fill = Airlines)) + geom_bar(stat="identity", position="dodge") + ggtitle("Graph 3: The Percentage of Arrival Delays for the Airlines") + ylab("Percentage of Planes Delayed")

Up

Conclusion: