knitr::opts_chunk$set(echo = TRUE)
# Clear the console
cat("\014")
# Check if the package is installed. If not, install the package
if(!require('tidyr')) {
install.packages('tidyr')
library(tidyr)
}
## Loading required package: tidyr
# Check if the package is installed. If not, install the package
if(!require('dplyr')) {
install.packages('dplyr')
library(tidyr)
}
## Loading required package: dplyr
## Warning: package 'dplyr' was built under R version 3.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# 1. Create a .CSV file (or optionally, a MySQL database!) that includes all of the information above
flightDataCSV <- read.csv("https://raw.githubusercontent.com/kalyanparthasarathy/DATA607/master/week5-flights-data.csv")
flightDataCSV
## Airlines Status Los.Angles Phoenix San.Diego San.Francisco Seattle
## 1 ALASKA on time 497 221 212 503 1841
## 2 ALASKA delayed 62 12 20 102 305
## 3 AM WEST on time 694 4840 383 320 201
## 4 AM WEST delayed 117 415 65 129 61
# 2. Read the information from your .CSV file into R,and use tidyr and dplyr as needed to tidy and transform your data
# Transform the columns of cities data into rows
tidyData <- gather(flightDataCSV, "City","# of Flights", 3:7)
tidyData
## Airlines Status City # of Flights
## 1 ALASKA on time Los.Angles 497
## 2 ALASKA delayed Los.Angles 62
## 3 AM WEST on time Los.Angles 694
## 4 AM WEST delayed Los.Angles 117
## 5 ALASKA on time Phoenix 221
## 6 ALASKA delayed Phoenix 12
## 7 AM WEST on time Phoenix 4840
## 8 AM WEST delayed Phoenix 415
## 9 ALASKA on time San.Diego 212
## 10 ALASKA delayed San.Diego 20
## 11 AM WEST on time San.Diego 383
## 12 AM WEST delayed San.Diego 65
## 13 ALASKA on time San.Francisco 503
## 14 ALASKA delayed San.Francisco 102
## 15 AM WEST on time San.Francisco 320
## 16 AM WEST delayed San.Francisco 129
## 17 ALASKA on time Seattle 1841
## 18 ALASKA delayed Seattle 305
## 19 AM WEST on time Seattle 201
## 20 AM WEST delayed Seattle 61
# Transpose flight status to columns - delayed and on time
tidyDataNew <- spread(tidyData, Status, `# of Flights`)
tidyDataNew
## Airlines City delayed on time
## 1 ALASKA Los.Angles 62 497
## 2 ALASKA Phoenix 12 221
## 3 ALASKA San.Diego 20 212
## 4 ALASKA San.Francisco 102 503
## 5 ALASKA Seattle 305 1841
## 6 AM WEST Los.Angles 117 694
## 7 AM WEST Phoenix 415 4840
## 8 AM WEST San.Diego 65 383
## 9 AM WEST San.Francisco 129 320
## 10 AM WEST Seattle 61 201
# 3. Perform analysis to compare the arrival delays for the two airlines
# Delayed percentage of total flights by city
delayedByCity <- mutate(tidyDataNew, delay_percent=round(delayed/(delayed + `on time`) * 100, 2))
delayedByCity
## Airlines City delayed on time delay_percent
## 1 ALASKA Los.Angles 62 497 11.09
## 2 ALASKA Phoenix 12 221 5.15
## 3 ALASKA San.Diego 20 212 8.62
## 4 ALASKA San.Francisco 102 503 16.86
## 5 ALASKA Seattle 305 1841 14.21
## 6 AM WEST Los.Angles 117 694 14.43
## 7 AM WEST Phoenix 415 4840 7.90
## 8 AM WEST San.Diego 65 383 14.51
## 9 AM WEST San.Francisco 129 320 28.73
## 10 AM WEST Seattle 61 201 23.28
# On time percentage of total flights by city
ontimeByCity <- mutate(tidyDataNew, ontime_percent=round(`on time`/(delayed + `on time`) * 100, 2))
ontimeByCity
## Airlines City delayed on time ontime_percent
## 1 ALASKA Los.Angles 62 497 88.91
## 2 ALASKA Phoenix 12 221 94.85
## 3 ALASKA San.Diego 20 212 91.38
## 4 ALASKA San.Francisco 102 503 83.14
## 5 ALASKA Seattle 305 1841 85.79
## 6 AM WEST Los.Angles 117 694 85.57
## 7 AM WEST Phoenix 415 4840 92.10
## 8 AM WEST San.Diego 65 383 85.49
## 9 AM WEST San.Francisco 129 320 71.27
## 10 AM WEST Seattle 61 201 76.72
# Delayed percentage of total flights by Airlines
(airlinesOverallStats <- group_by(tidyDataNew, Airlines))
## # A tibble: 10 x 4
## # Groups: Airlines [2]
## Airlines City delayed `on time`
## * <fctr> <chr> <int> <int>
## 1 ALASKA Los.Angles 62 497
## 2 ALASKA Phoenix 12 221
## 3 ALASKA San.Diego 20 212
## 4 ALASKA San.Francisco 102 503
## 5 ALASKA Seattle 305 1841
## 6 AM WEST Los.Angles 117 694
## 7 AM WEST Phoenix 415 4840
## 8 AM WEST San.Diego 65 383
## 9 AM WEST San.Francisco 129 320
## 10 AM WEST Seattle 61 201
airlinesOverallStats <- select(airlinesOverallStats, Airlines, delayed, 'on time')
airlinesOverallStatsSummary <- summarise(
airlinesOverallStats
, total_delayed=sum(delayed)
, total_ontime=sum(`on time`)
, delay_percent=round(sum(delayed)/(sum(delayed) + sum(`on time`)) * 100, 2)
, ontime_percent=round(sum(`on time`)/(sum(delayed) + sum(`on time`)) * 100, 2)
)
airlinesOverallStatsSummary
## # A tibble: 2 x 5
## Airlines total_delayed total_ontime delay_percent ontime_percent
## <fctr> <int> <int> <dbl> <dbl>
## 1 ALASKA 501 3274 13.27 86.73
## 2 AM WEST 787 6438 10.89 89.11
Overall, AM West airlines has performed better than ALASKA airlines with respect to on time performance.