Read the information for airlines from your .CSV file into R, and use tidyr and dplyr as needed to tidy and transform your data. Perform analysis to compare the arrival delays for the two airlines

Reading the data into R:

url_fly <- "https://raw.githubusercontent.com/emrahakin1985/DATA607/master/datasets/fly.csv"
fly <- read.csv(url_fly, stringsAsFactors = F)
fly
##         X     X.1 Los.Angeles Phoenix San.Diego San.Fransisco Seattle
## 1  ALASKA on time         497     221       212           503    1841
## 2         delayed          62      12        20           102     305
## 3                          NA      NA        NA            NA      NA
## 4 AM WEST on time         694    4840       383           320     201
## 5         delayed         117     415        65           129      61


Removing the line, filling the column for Airlines:

colnames(fly)[1:2] <- c("Airlines", "Arrival")
fly2 <- fly[c(1,2,4,5),]
fly2$Airlines <- rep(c("Alaska", "AM WEST"), each = 2 )
fly2
##   Airlines Arrival Los.Angeles Phoenix San.Diego San.Fransisco Seattle
## 1   Alaska on time         497     221       212           503    1841
## 2   Alaska delayed          62      12        20           102     305
## 4  AM WEST on time         694    4840       383           320     201
## 5  AM WEST delayed         117     415        65           129      61


Gathering city columns into rows:

fly3 <- fly2 %>%
  gather(Cities, n, -Airlines, -Arrival)
fly3
##    Airlines Arrival        Cities    n
## 1    Alaska on time   Los.Angeles  497
## 2    Alaska delayed   Los.Angeles   62
## 3   AM WEST on time   Los.Angeles  694
## 4   AM WEST delayed   Los.Angeles  117
## 5    Alaska on time       Phoenix  221
## 6    Alaska delayed       Phoenix   12
## 7   AM WEST on time       Phoenix 4840
## 8   AM WEST delayed       Phoenix  415
## 9    Alaska on time     San.Diego  212
## 10   Alaska delayed     San.Diego   20
## 11  AM WEST on time     San.Diego  383
## 12  AM WEST delayed     San.Diego   65
## 13   Alaska on time San.Fransisco  503
## 14   Alaska delayed San.Fransisco  102
## 15  AM WEST on time San.Fransisco  320
## 16  AM WEST delayed San.Fransisco  129
## 17   Alaska on time       Seattle 1841
## 18   Alaska delayed       Seattle  305
## 19  AM WEST on time       Seattle  201
## 20  AM WEST delayed       Seattle   61


Spreading elements in Arrival column into 2 different columns:

fly4 <- fly3 %>% 
  spread(Arrival, n)
fly4
##    Airlines        Cities delayed on time
## 1    Alaska   Los.Angeles      62     497
## 2    Alaska       Phoenix      12     221
## 3    Alaska     San.Diego      20     212
## 4    Alaska San.Fransisco     102     503
## 5    Alaska       Seattle     305    1841
## 6   AM WEST   Los.Angeles     117     694
## 7   AM WEST       Phoenix     415    4840
## 8   AM WEST     San.Diego      65     383
## 9   AM WEST San.Fransisco     129     320
## 10  AM WEST       Seattle      61     201


Adding Total column to use it for analysis.

colnames(fly4)[4] <- "on_time"
fly5 <- fly4 %>% 
  mutate(Total = delayed + on_time)
fly5
##    Airlines        Cities delayed on_time Total
## 1    Alaska   Los.Angeles      62     497   559
## 2    Alaska       Phoenix      12     221   233
## 3    Alaska     San.Diego      20     212   232
## 4    Alaska San.Fransisco     102     503   605
## 5    Alaska       Seattle     305    1841  2146
## 6   AM WEST   Los.Angeles     117     694   811
## 7   AM WEST       Phoenix     415    4840  5255
## 8   AM WEST     San.Diego      65     383   448
## 9   AM WEST San.Fransisco     129     320   449
## 10  AM WEST       Seattle      61     201   262


Delay percentage for each Airline:

fly5_compare <- fly5 %>% 
  group_by(Airlines) %>% 
  summarise(Delay_Rate = sum(delayed)/ sum(Total))

fly5_compare$Delay_Rate <- str_c(round(fly5_compare$Delay_Rate*100, 1), "%")
fly5_compare
## # A tibble: 2 x 2
##   Airlines Delay_Rate
##      <chr>      <chr>
## 1   Alaska      13.3%
## 2  AM WEST      10.9%


Delay_rate for each airline to the total number of flights.. However number of flights for AM WEST is almost twice as many.

fly5_compare2 <- fly5 %>% 
  group_by(Airlines) %>% 
  summarise(Number_of_Flights = sum(Total), Delay_Rate = sum(delayed)/ sum(fly5$Total))

fly5_compare2$Delay_Rate <- str_c(round(fly5_compare2$Delay_Rate*100, 1), "%")
fly5_compare2
## # A tibble: 2 x 3
##   Airlines Number_of_Flights Delay_Rate
##      <chr>             <int>      <chr>
## 1   Alaska              3775       4.6%
## 2  AM WEST              7225       7.2%


Adding Delay Rate column:

fly6 <- fly5 %>% 
  mutate(Delay_Rate = delayed / Total)

fly6
##    Airlines        Cities delayed on_time Total Delay_Rate
## 1    Alaska   Los.Angeles      62     497   559 0.11091234
## 2    Alaska       Phoenix      12     221   233 0.05150215
## 3    Alaska     San.Diego      20     212   232 0.08620690
## 4    Alaska San.Fransisco     102     503   605 0.16859504
## 5    Alaska       Seattle     305    1841  2146 0.14212488
## 6   AM WEST   Los.Angeles     117     694   811 0.14426634
## 7   AM WEST       Phoenix     415    4840  5255 0.07897241
## 8   AM WEST     San.Diego      65     383   448 0.14508929
## 9   AM WEST San.Fransisco     129     320   449 0.28730512
## 10  AM WEST       Seattle      61     201   262 0.23282443


Spreading Airlines into 2 columnes to get the delay rate for each city.

fly7 <- fly6  %>%
  select(Airlines, Cities, Delay_Rate) %>% 
  spread(Airlines, Delay_Rate) %>% 
  select(Cities, "Delay_Rate_Alaska" = Alaska, "Delay_Rate_AM_WEST" = "AM WEST")
fly7
##          Cities Delay_Rate_Alaska Delay_Rate_AM_WEST
## 1   Los.Angeles        0.11091234         0.14426634
## 2       Phoenix        0.05150215         0.07897241
## 3     San.Diego        0.08620690         0.14508929
## 4 San.Fransisco        0.16859504         0.28730512
## 5       Seattle        0.14212488         0.23282443


Changing the values in delay rate columns into percantages:

fly7[,2:3] = round(fly7[,2:3], 3)
fly7$Delay_Rate_Alaska <- str_c(fly7$Delay_Rate_Alaska*100, "%")
fly7$Delay_Rate_AM_WEST <- str_c(fly7$Delay_Rate_AM_WEST*100, "%")
fly7
##          Cities Delay_Rate_Alaska Delay_Rate_AM_WEST
## 1   Los.Angeles             11.1%              14.4%
## 2       Phoenix              5.2%               7.9%
## 3     San.Diego              8.6%              14.5%
## 4 San.Fransisco             16.9%              28.7%
## 5       Seattle             14.2%              23.3%