load CSV file and Clean data

url <- "https://raw.githubusercontent.com/kglan/MSDS/main/DATA607/WorkingWithTidyData/WorkingwithTidyData.csv"
rawdata <-read.csv(url)
rawdata <- t(rawdata)
rawdata<- data.frame(rawdata)
rownames(rawdata)<- c()
rawdata
##        X1      X2   X3      X4      X5
## 1  ALASKA              AM WEST        
## 2 on time delayed      on time delayed
## 3     497      62 <NA>     694     117
## 4     221      12 <NA>    4840     415
## 5     212      20 <NA>     383      65
## 6     503     102 <NA>     320     129
## 7    1841     305 <NA>     201      61
names(rawdata)
## [1] "X1" "X2" "X3" "X4" "X5"
rownames(rawdata)
## [1] "1" "2" "3" "4" "5" "6" "7"
data <- rawdata%>%
  select(-3,)%>%
  slice(-c(1,2))%>%
  mutate_if(is.character, as.numeric)%>%
  t()%>%
  data.frame()%>%
  mutate("Total" = X1 + X2 +X3 + X4 + X5)%>%
  t()%>%
  data.frame()%>%
  rename("ALASKA on Time" = "X1",
         "ALASKA delayed" = "X2",
         "AM_WEST on Time" = "X4",
         "AM_WEST delayed "= "X5")

row.names(data) <- c("Los Angeles", "Phoenix", "San Diego", "San Francisco", "Seattle", "Total")
data
##               ALASKA on Time ALASKA delayed AM_WEST on Time AM_WEST delayed 
## Los Angeles              497             62             694              117
## Phoenix                  221             12            4840              415
## San Diego                212             20             383               65
## San Francisco            503            102             320              129
## Seattle                 1841            305             201               61
## Total                   3274            501            6438              787

Now that data is clean we will conduct Analysis

When we compare both airlines we see that the ALASKA airline has fewer delays within their flights. It is significant to note that The ALASKA airline has less flights total which shows that with such a magnitude of flights AM_WEST is still close in comparison.

airline<- data%>%
  mutate("ALASKA total_flights"= `ALASKA delayed`+`ALASKA on Time`)%>%
  mutate("AM_WEST total_flights"= `AM_WEST delayed `+ `AM_WEST on Time`)%>%
  mutate("ALASKA total_delays"= `ALASKA delayed`/(`ALASKA on Time`+`ALASKA delayed`))%>%
  mutate("AM_WEST total_delays"= `AM_WEST delayed `/(`AM_WEST on Time`+`AM_WEST delayed `))%>%
  round(digits = 2)

airline
##               ALASKA on Time ALASKA delayed AM_WEST on Time AM_WEST delayed 
## Los Angeles              497             62             694              117
## Phoenix                  221             12            4840              415
## San Diego                212             20             383               65
## San Francisco            503            102             320              129
## Seattle                 1841            305             201               61
## Total                   3274            501            6438              787
##               ALASKA total_flights AM_WEST total_flights ALASKA total_delays
## Los Angeles                    559                   811                0.11
## Phoenix                        233                  5255                0.05
## San Diego                      232                   448                0.09
## San Francisco                  605                   449                0.17
## Seattle                       2146                   262                0.14
## Total                         3775                  7225                0.13
##               AM_WEST total_delays
## Los Angeles                   0.14
## Phoenix                       0.08
## San Diego                     0.15
## San Francisco                 0.29
## Seattle                       0.23
## Total                         0.11

Visualization

library(ggplot2)
airline$City<- c("Los Angeles","Phoenix", "San Diego", "San Francisco", "Seattle", "Total")
ggplot(airline, aes(x=City, y=`ALASKA total_delays`)) +
  geom_bar(position='dodge', stat='identity')

ggplot(airline, aes(x=City, y= `AM_WEST total_delays`)) +
  geom_bar(position=position_dodge(), stat='identity')

Conclusion

When looking at the data in in the bar graphs, we see that AM_WEST has a high delay rate in San Francisco and Seattle which skew its delay rate with these two cities. When looking at ALASKA it shows that the delay rates are relatively similar amongst the cities besides Phoenix and San Diego. This implies that AM_WEST needs to focus on fixing the reasons it has such high delays in San Francisco and Seattle.