Assignment – Tidying and Transforming Data
Load required packages
library(tidyr)
library('dplyr')
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
flightdata <- read.csv("https://raw.githubusercontent.com/IvanGrozny88/DATA607_Assgnment4/main/DATA607_Assgnment4.csv")
flightdata
## Airlines Status Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1 ALASKA On Time 497 221 212 503 1841
## 2 ALASKA Delayed 62 12 20 102 305
## 3 AM WEST On Time 694 4840 383 320 201
## 4 AM WEST Delayed 117 415 65 129 61
# transform the columns in to rows data
tidyData <- gather(flightdata, "City", "Flights", 3:7)
tidyData
## Airlines Status City Flights
## 1 ALASKA On Time Los.Angeles 497
## 2 ALASKA Delayed Los.Angeles 62
## 3 AM WEST On Time Los.Angeles 694
## 4 AM WEST Delayed Los.Angeles 117
## 5 ALASKA On Time Phoenix 221
## 6 ALASKA Delayed Phoenix 12
## 7 AM WEST On Time Phoenix 4840
## 8 AM WEST Delayed Phoenix 415
## 9 ALASKA On Time San.Diego 212
## 10 ALASKA Delayed San.Diego 20
## 11 AM WEST On Time San.Diego 383
## 12 AM WEST Delayed San.Diego 65
## 13 ALASKA On Time San.Francisco 503
## 14 ALASKA Delayed San.Francisco 102
## 15 AM WEST On Time San.Francisco 320
## 16 AM WEST Delayed San.Francisco 129
## 17 ALASKA On Time Seattle 1841
## 18 ALASKA Delayed Seattle 305
## 19 AM WEST On Time Seattle 201
## 20 AM WEST Delayed Seattle 61
# Transpose flight status to columns: Delayed and On Time
tidyData2 <- spread(tidyData, Status, 'Flights')
tidyData2
## Airlines City Delayed On Time
## 1 ALASKA Los.Angeles 62 497
## 2 ALASKA Phoenix 12 221
## 3 ALASKA San.Diego 20 212
## 4 ALASKA San.Francisco 102 503
## 5 ALASKA Seattle 305 1841
## 6 AM WEST Los.Angeles 117 694
## 7 AM WEST Phoenix 415 4840
## 8 AM WEST San.Diego 65 383
## 9 AM WEST San.Francisco 129 320
## 10 AM WEST Seattle 61 201
# Delayed percentage of total flights by city
delayedByCity <- mutate(tidyData2, delay_percent=round(Delayed/(Delayed + `On Time`) * 100, 2))
delayedByCity
## Airlines City Delayed On Time delay_percent
## 1 ALASKA Los.Angeles 62 497 11.09
## 2 ALASKA Phoenix 12 221 5.15
## 3 ALASKA San.Diego 20 212 8.62
## 4 ALASKA San.Francisco 102 503 16.86
## 5 ALASKA Seattle 305 1841 14.21
## 6 AM WEST Los.Angeles 117 694 14.43
## 7 AM WEST Phoenix 415 4840 7.90
## 8 AM WEST San.Diego 65 383 14.51
## 9 AM WEST San.Francisco 129 320 28.73
## 10 AM WEST Seattle 61 201 23.28
# On time percentage of total flights by city
ontimeByCity <- mutate(tidyData2, ontime_percent=round(`On Time`/(Delayed + `On Time`) * 100, 2))
ontimeByCity
## Airlines City Delayed On Time ontime_percent
## 1 ALASKA Los.Angeles 62 497 88.91
## 2 ALASKA Phoenix 12 221 94.85
## 3 ALASKA San.Diego 20 212 91.38
## 4 ALASKA San.Francisco 102 503 83.14
## 5 ALASKA Seattle 305 1841 85.79
## 6 AM WEST Los.Angeles 117 694 85.57
## 7 AM WEST Phoenix 415 4840 92.10
## 8 AM WEST San.Diego 65 383 85.49
## 9 AM WEST San.Francisco 129 320 71.27
## 10 AM WEST Seattle 61 201 76.72
# delayed precentage of total flights by Airlines
(airlinesOverallStats <- group_by(tidyData2, Airlines))
## # A tibble: 10 × 4
## # Groups: Airlines [2]
## Airlines City Delayed `On Time`
## <chr> <chr> <int> <int>
## 1 ALASKA Los.Angeles 62 497
## 2 ALASKA Phoenix 12 221
## 3 ALASKA San.Diego 20 212
## 4 ALASKA San.Francisco 102 503
## 5 ALASKA Seattle 305 1841
## 6 AM WEST Los.Angeles 117 694
## 7 AM WEST Phoenix 415 4840
## 8 AM WEST San.Diego 65 383
## 9 AM WEST San.Francisco 129 320
## 10 AM WEST Seattle 61 201
airlinesOverallStats <- select(airlinesOverallStats, Airlines, Delayed, 'On Time')
airlinesOverallStatsSummary <- summarise(airlinesOverallStats, total_delayed=sum(Delayed), total_ontime=sum(`On Time`), delay_percent=round(sum(Delayed)/(sum(Delayed) + sum(`On Time`)) * 100, 2), ontime_percent=round(sum(`On Time`)/(sum(Delayed) + sum(`On Time`)) * 100, 2))
airlinesOverallStatsSummary
## # A tibble: 2 × 5
## Airlines total_delayed total_ontime delay_percent ontime_percent
## <chr> <int> <int> <dbl> <dbl>
## 1 ALASKA 501 3274 13.3 86.7
## 2 AM WEST 787 6438 10.9 89.1
AM WEST airlines has perfomed better than ALASKA airlines with respect to on time performance.