Overview

First we create a csv file to represent this chart:

And then load it into R. The goal is to then transform the data from wide to long, and then perform some analysis on the data.

airline_data <- read.csv('airlinedata.csv')
airline_data
##   Airline On_time_delayed LosAngeles Phoenix SanDiego SanFrancisco Seattle
## 1  Alaska         on time        497     221      212          503    1841
## 2  Alaska         delayed         62      12       20          102     305
## 3  AMWest         on time        694    4840      383          320     201
## 4  AMWest         delayed        117     415       65          129      61
pacman::p_load(dplyr, tidyr, magrittr)

#Transform from a wide format to long by grouping the locations into a 'Destination' column
long_data <- gather(airline_data, 'Destination', 'Num_Flights', 3:7)
long_data
##    Airline On_time_delayed  Destination Num_Flights
## 1   Alaska         on time   LosAngeles         497
## 2   Alaska         delayed   LosAngeles          62
## 3   AMWest         on time   LosAngeles         694
## 4   AMWest         delayed   LosAngeles         117
## 5   Alaska         on time      Phoenix         221
## 6   Alaska         delayed      Phoenix          12
## 7   AMWest         on time      Phoenix        4840
## 8   AMWest         delayed      Phoenix         415
## 9   Alaska         on time     SanDiego         212
## 10  Alaska         delayed     SanDiego          20
## 11  AMWest         on time     SanDiego         383
## 12  AMWest         delayed     SanDiego          65
## 13  Alaska         on time SanFrancisco         503
## 14  Alaska         delayed SanFrancisco         102
## 15  AMWest         on time SanFrancisco         320
## 16  AMWest         delayed SanFrancisco         129
## 17  Alaska         on time      Seattle        1841
## 18  Alaska         delayed      Seattle         305
## 19  AMWest         on time      Seattle         201
## 20  AMWest         delayed      Seattle          61
Alaska_Delayed <- long_data %>%
  select(Num_Flights) %>%
  filter(long_data$Airline == 'Alaska', long_data$On_time_delayed == 'delayed')

Alaska_ontime <- long_data %>%
  select(Num_Flights) %>%
  filter(long_data$Airline == 'Alaska', long_data$On_time_delayed == 'on time')

AMWest_Delayed <- long_data %>%
  select(Num_Flights) %>%
  filter(long_data$Airline == 'AMWest', long_data$On_time_delayed == 'delayed')

AMWest_ontime <- long_data %>%
  select(Num_Flights) %>%
  filter(long_data$Airline == 'AMWest', long_data$On_time_delayed == 'on time')

Destination <- distinct(long_data, Destination)
#Re-organize the data so that we can see the number of flights delayed or on time for each airline.  Each row represents a different destination.  Also added columns of total on time flights, delayed flights, and total flights for each destination.


totals <- data.frame(c(Destination, Alaska_Delayed, Alaska_ontime, AMWest_Delayed, AMWest_ontime))
colnames(totals) <- c('Destination', 'Alaska_Delayed', 'Alaska_ontime', 'AMWest_Delayed', 'AMWest_ontime')

totals %<>% 
  mutate(On_time = Alaska_ontime + AMWest_ontime) %>%
  mutate(Delayed = Alaska_Delayed + AMWest_Delayed) %>%
  mutate(Flights = Alaska_Delayed + Alaska_ontime + AMWest_Delayed + AMWest_ontime)

totals
##    Destination Alaska_Delayed Alaska_ontime AMWest_Delayed AMWest_ontime
## 1   LosAngeles             62           497            117           694
## 2      Phoenix             12           221            415          4840
## 3     SanDiego             20           212             65           383
## 4 SanFrancisco            102           503            129           320
## 5      Seattle            305          1841             61           201
##   On_time Delayed Flights
## 1    1191     179    1370
## 2    5061     427    5488
## 3     595      85     680
## 4     823     231    1054
## 5    2042     366    2408
Alaska_Delayed <- sum(totals$Alaska_Delayed)
Alaska_ontime <- sum(totals$Alaska_ontime)
Alaska_flights <- Alaska_Delayed + Alaska_ontime
AMWest_Delayed <- sum(totals$AMWest_Delayed)
AMWest_ontime <- sum(totals$AMWest_ontime)
AMWest_flights <- AMWest_Delayed + AMWest_ontime
#Delayed Percentages by Airline
Alaska_p <- (Alaska_Delayed / Alaska_flights) * 100
AMWest_p <- (AMWest_Delayed / AMWest_flights) * 100

cat('The percentage of flights delayed for Alaska Airlines is:', Alaska_p, '%\n')
## The percentage of flights delayed for Alaska Airlines is: 13.27152 %
cat('The percentage of flights delayed for AMWest Airlines is:', AMWest_p, '%')
## The percentage of flights delayed for AMWest Airlines is: 10.89273 %
#Delayed Percentages by Destination
LA_p <- (totals[1, 7] / totals[1,8]) * 100
PHO_p <- (totals[2, 7] / totals[2,8]) * 100
SD_p <- (totals[3, 7] / totals[3,8]) * 100
SF_p <- (totals[4, 7] / totals[4,8]) * 100
SEA_p <- (totals[5, 7] / totals[5,8]) * 100

cat('The percentage of flights delayed heading to Los Angeles is:', LA_p, '%\n')
## The percentage of flights delayed heading to Los Angeles is: 13.06569 %
cat('The percentage of flights delayed heading to Phoenix is:', PHO_p, '%\n')
## The percentage of flights delayed heading to Phoenix is: 7.780612 %
cat('The percentage of flights delayed heading to San Diego is:', SD_p, '%\n')
## The percentage of flights delayed heading to San Diego is: 12.5 %
cat('The percentage of flights delayed heading to San Francisco is:', SF_p, '%\n')
## The percentage of flights delayed heading to San Francisco is: 21.91651 %
cat('The percentage of flights delayed heading to Seattle is:', SEA_p, '%')
## The percentage of flights delayed heading to Seattle is: 15.19934 %

Conclusion

From the analysis we found:

The Airline that was more frequently on time was AMWest Airlines.

The Destination that flights were most frequently delayed for was San Francisco.

The Destination that flights were least frequently delayed for was Phoenix.