First we create a csv file to represent this chart:
And then load it into R. The goal is to then transform the data from wide to long, and then perform some analysis on the data.
airline_data <- read.csv('airlinedata.csv')
airline_data
## Airline On_time_delayed LosAngeles Phoenix SanDiego SanFrancisco Seattle
## 1 Alaska on time 497 221 212 503 1841
## 2 Alaska delayed 62 12 20 102 305
## 3 AMWest on time 694 4840 383 320 201
## 4 AMWest delayed 117 415 65 129 61
pacman::p_load(dplyr, tidyr, magrittr)
#Transform from a wide format to long by grouping the locations into a 'Destination' column
long_data <- gather(airline_data, 'Destination', 'Num_Flights', 3:7)
long_data
## Airline On_time_delayed Destination Num_Flights
## 1 Alaska on time LosAngeles 497
## 2 Alaska delayed LosAngeles 62
## 3 AMWest on time LosAngeles 694
## 4 AMWest delayed LosAngeles 117
## 5 Alaska on time Phoenix 221
## 6 Alaska delayed Phoenix 12
## 7 AMWest on time Phoenix 4840
## 8 AMWest delayed Phoenix 415
## 9 Alaska on time SanDiego 212
## 10 Alaska delayed SanDiego 20
## 11 AMWest on time SanDiego 383
## 12 AMWest delayed SanDiego 65
## 13 Alaska on time SanFrancisco 503
## 14 Alaska delayed SanFrancisco 102
## 15 AMWest on time SanFrancisco 320
## 16 AMWest delayed SanFrancisco 129
## 17 Alaska on time Seattle 1841
## 18 Alaska delayed Seattle 305
## 19 AMWest on time Seattle 201
## 20 AMWest delayed Seattle 61
Alaska_Delayed <- long_data %>%
select(Num_Flights) %>%
filter(long_data$Airline == 'Alaska', long_data$On_time_delayed == 'delayed')
Alaska_ontime <- long_data %>%
select(Num_Flights) %>%
filter(long_data$Airline == 'Alaska', long_data$On_time_delayed == 'on time')
AMWest_Delayed <- long_data %>%
select(Num_Flights) %>%
filter(long_data$Airline == 'AMWest', long_data$On_time_delayed == 'delayed')
AMWest_ontime <- long_data %>%
select(Num_Flights) %>%
filter(long_data$Airline == 'AMWest', long_data$On_time_delayed == 'on time')
Destination <- distinct(long_data, Destination)
#Re-organize the data so that we can see the number of flights delayed or on time for each airline. Each row represents a different destination. Also added columns of total on time flights, delayed flights, and total flights for each destination.
totals <- data.frame(c(Destination, Alaska_Delayed, Alaska_ontime, AMWest_Delayed, AMWest_ontime))
colnames(totals) <- c('Destination', 'Alaska_Delayed', 'Alaska_ontime', 'AMWest_Delayed', 'AMWest_ontime')
totals %<>%
mutate(On_time = Alaska_ontime + AMWest_ontime) %>%
mutate(Delayed = Alaska_Delayed + AMWest_Delayed) %>%
mutate(Flights = Alaska_Delayed + Alaska_ontime + AMWest_Delayed + AMWest_ontime)
totals
## Destination Alaska_Delayed Alaska_ontime AMWest_Delayed AMWest_ontime
## 1 LosAngeles 62 497 117 694
## 2 Phoenix 12 221 415 4840
## 3 SanDiego 20 212 65 383
## 4 SanFrancisco 102 503 129 320
## 5 Seattle 305 1841 61 201
## On_time Delayed Flights
## 1 1191 179 1370
## 2 5061 427 5488
## 3 595 85 680
## 4 823 231 1054
## 5 2042 366 2408
Alaska_Delayed <- sum(totals$Alaska_Delayed)
Alaska_ontime <- sum(totals$Alaska_ontime)
Alaska_flights <- Alaska_Delayed + Alaska_ontime
AMWest_Delayed <- sum(totals$AMWest_Delayed)
AMWest_ontime <- sum(totals$AMWest_ontime)
AMWest_flights <- AMWest_Delayed + AMWest_ontime
#Delayed Percentages by Airline
Alaska_p <- (Alaska_Delayed / Alaska_flights) * 100
AMWest_p <- (AMWest_Delayed / AMWest_flights) * 100
cat('The percentage of flights delayed for Alaska Airlines is:', Alaska_p, '%\n')
## The percentage of flights delayed for Alaska Airlines is: 13.27152 %
cat('The percentage of flights delayed for AMWest Airlines is:', AMWest_p, '%')
## The percentage of flights delayed for AMWest Airlines is: 10.89273 %
#Delayed Percentages by Destination
LA_p <- (totals[1, 7] / totals[1,8]) * 100
PHO_p <- (totals[2, 7] / totals[2,8]) * 100
SD_p <- (totals[3, 7] / totals[3,8]) * 100
SF_p <- (totals[4, 7] / totals[4,8]) * 100
SEA_p <- (totals[5, 7] / totals[5,8]) * 100
cat('The percentage of flights delayed heading to Los Angeles is:', LA_p, '%\n')
## The percentage of flights delayed heading to Los Angeles is: 13.06569 %
cat('The percentage of flights delayed heading to Phoenix is:', PHO_p, '%\n')
## The percentage of flights delayed heading to Phoenix is: 7.780612 %
cat('The percentage of flights delayed heading to San Diego is:', SD_p, '%\n')
## The percentage of flights delayed heading to San Diego is: 12.5 %
cat('The percentage of flights delayed heading to San Francisco is:', SF_p, '%\n')
## The percentage of flights delayed heading to San Francisco is: 21.91651 %
cat('The percentage of flights delayed heading to Seattle is:', SEA_p, '%')
## The percentage of flights delayed heading to Seattle is: 15.19934 %
From the analysis we found:
The Airline that was more frequently on time was AMWest Airlines.
The Destination that flights were most frequently delayed for was San Francisco.
The Destination that flights were least frequently delayed for was Phoenix.