Load required packages

knitr::opts_chunk$set(echo = TRUE)

# Clear the console
cat("\014")

# Check if the package is installed. If not, install the package
if(!require('tidyr')) {
  install.packages('tidyr')
  library(tidyr)
}
## Loading required package: tidyr
# Check if the package is installed. If not, install the package
if(!require('dplyr')) {
  install.packages('dplyr')
  library(tidyr)
}
## Loading required package: dplyr
## Warning: package 'dplyr' was built under R version 3.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Question 1

# 1. Create a .CSV file (or optionally, a MySQL database!) that includes all of the information above

flightDataCSV <- read.csv("https://raw.githubusercontent.com/kalyanparthasarathy/DATA607/master/week5-flights-data.csv")
flightDataCSV
##   Airlines  Status Los.Angles Phoenix San.Diego San.Francisco Seattle
## 1   ALASKA on time        497     221       212           503    1841
## 2   ALASKA delayed         62      12        20           102     305
## 3  AM WEST on time        694    4840       383           320     201
## 4  AM WEST delayed        117     415        65           129      61

Question 2

# 2. Read the information from your .CSV file into R,and use tidyr and dplyr as needed to tidy and transform your data

# Transform the columns of cities data into rows
tidyData <- gather(flightDataCSV, "City","# of Flights", 3:7)
tidyData
##    Airlines  Status          City # of Flights
## 1    ALASKA on time    Los.Angles          497
## 2    ALASKA delayed    Los.Angles           62
## 3   AM WEST on time    Los.Angles          694
## 4   AM WEST delayed    Los.Angles          117
## 5    ALASKA on time       Phoenix          221
## 6    ALASKA delayed       Phoenix           12
## 7   AM WEST on time       Phoenix         4840
## 8   AM WEST delayed       Phoenix          415
## 9    ALASKA on time     San.Diego          212
## 10   ALASKA delayed     San.Diego           20
## 11  AM WEST on time     San.Diego          383
## 12  AM WEST delayed     San.Diego           65
## 13   ALASKA on time San.Francisco          503
## 14   ALASKA delayed San.Francisco          102
## 15  AM WEST on time San.Francisco          320
## 16  AM WEST delayed San.Francisco          129
## 17   ALASKA on time       Seattle         1841
## 18   ALASKA delayed       Seattle          305
## 19  AM WEST on time       Seattle          201
## 20  AM WEST delayed       Seattle           61
# Transpose flight status to columns - delayed and on time
tidyDataNew <- spread(tidyData, Status, `# of Flights`)

tidyDataNew
##    Airlines          City delayed on time
## 1    ALASKA    Los.Angles      62     497
## 2    ALASKA       Phoenix      12     221
## 3    ALASKA     San.Diego      20     212
## 4    ALASKA San.Francisco     102     503
## 5    ALASKA       Seattle     305    1841
## 6   AM WEST    Los.Angles     117     694
## 7   AM WEST       Phoenix     415    4840
## 8   AM WEST     San.Diego      65     383
## 9   AM WEST San.Francisco     129     320
## 10  AM WEST       Seattle      61     201

Question 3

# 3. Perform analysis to compare the arrival delays for the two airlines

# Delayed percentage of total flights by city
delayedByCity <- mutate(tidyDataNew, delay_percent=round(delayed/(delayed + `on time`) * 100, 2))

delayedByCity
##    Airlines          City delayed on time delay_percent
## 1    ALASKA    Los.Angles      62     497         11.09
## 2    ALASKA       Phoenix      12     221          5.15
## 3    ALASKA     San.Diego      20     212          8.62
## 4    ALASKA San.Francisco     102     503         16.86
## 5    ALASKA       Seattle     305    1841         14.21
## 6   AM WEST    Los.Angles     117     694         14.43
## 7   AM WEST       Phoenix     415    4840          7.90
## 8   AM WEST     San.Diego      65     383         14.51
## 9   AM WEST San.Francisco     129     320         28.73
## 10  AM WEST       Seattle      61     201         23.28
# On time percentage of total flights by city
ontimeByCity <- mutate(tidyDataNew, ontime_percent=round(`on time`/(delayed + `on time`) * 100, 2))

ontimeByCity
##    Airlines          City delayed on time ontime_percent
## 1    ALASKA    Los.Angles      62     497          88.91
## 2    ALASKA       Phoenix      12     221          94.85
## 3    ALASKA     San.Diego      20     212          91.38
## 4    ALASKA San.Francisco     102     503          83.14
## 5    ALASKA       Seattle     305    1841          85.79
## 6   AM WEST    Los.Angles     117     694          85.57
## 7   AM WEST       Phoenix     415    4840          92.10
## 8   AM WEST     San.Diego      65     383          85.49
## 9   AM WEST San.Francisco     129     320          71.27
## 10  AM WEST       Seattle      61     201          76.72
# Delayed percentage of total flights by Airlines
(airlinesOverallStats <- group_by(tidyDataNew, Airlines))
## # A tibble: 10 x 4
## # Groups:   Airlines [2]
##    Airlines          City delayed `on time`
##  *   <fctr>         <chr>   <int>     <int>
##  1   ALASKA    Los.Angles      62       497
##  2   ALASKA       Phoenix      12       221
##  3   ALASKA     San.Diego      20       212
##  4   ALASKA San.Francisco     102       503
##  5   ALASKA       Seattle     305      1841
##  6  AM WEST    Los.Angles     117       694
##  7  AM WEST       Phoenix     415      4840
##  8  AM WEST     San.Diego      65       383
##  9  AM WEST San.Francisco     129       320
## 10  AM WEST       Seattle      61       201
airlinesOverallStats <- select(airlinesOverallStats, Airlines, delayed, 'on time')
airlinesOverallStatsSummary <- summarise(
                          airlinesOverallStats
                          , total_delayed=sum(delayed)
                          , total_ontime=sum(`on time`)
                          , delay_percent=round(sum(delayed)/(sum(delayed) + sum(`on time`)) * 100, 2)
                          , ontime_percent=round(sum(`on time`)/(sum(delayed) + sum(`on time`)) * 100, 2)
                        )
airlinesOverallStatsSummary
## # A tibble: 2 x 5
##   Airlines total_delayed total_ontime delay_percent ontime_percent
##     <fctr>         <int>        <int>         <dbl>          <dbl>
## 1   ALASKA           501         3274         13.27          86.73
## 2  AM WEST           787         6438         10.89          89.11

Overall, AM West airlines has performed better than ALASKA airlines with respect to on time performance.