url <- "https://raw.githubusercontent.com/kglan/MSDS/main/DATA607/WorkingWithTidyData/WorkingwithTidyData.csv"
rawdata <-read.csv(url)
rawdata <- t(rawdata)
rawdata<- data.frame(rawdata)
rownames(rawdata)<- c()
rawdata
## X1 X2 X3 X4 X5
## 1 ALASKA AM WEST
## 2 on time delayed on time delayed
## 3 497 62 <NA> 694 117
## 4 221 12 <NA> 4840 415
## 5 212 20 <NA> 383 65
## 6 503 102 <NA> 320 129
## 7 1841 305 <NA> 201 61
names(rawdata)
## [1] "X1" "X2" "X3" "X4" "X5"
rownames(rawdata)
## [1] "1" "2" "3" "4" "5" "6" "7"
data <- rawdata%>%
select(-3,)%>%
slice(-c(1,2))%>%
mutate_if(is.character, as.numeric)%>%
t()%>%
data.frame()%>%
mutate("Total" = X1 + X2 +X3 + X4 + X5)%>%
t()%>%
data.frame()%>%
rename("ALASKA on Time" = "X1",
"ALASKA delayed" = "X2",
"AM_WEST on Time" = "X4",
"AM_WEST delayed "= "X5")
row.names(data) <- c("Los Angeles", "Phoenix", "San Diego", "San Francisco", "Seattle", "Total")
data
## ALASKA on Time ALASKA delayed AM_WEST on Time AM_WEST delayed
## Los Angeles 497 62 694 117
## Phoenix 221 12 4840 415
## San Diego 212 20 383 65
## San Francisco 503 102 320 129
## Seattle 1841 305 201 61
## Total 3274 501 6438 787
When we compare both airlines we see that the ALASKA airline has fewer delays within their flights. It is significant to note that The ALASKA airline has less flights total which shows that with such a magnitude of flights AM_WEST is still close in comparison.
airline<- data%>%
mutate("ALASKA total_flights"= `ALASKA delayed`+`ALASKA on Time`)%>%
mutate("AM_WEST total_flights"= `AM_WEST delayed `+ `AM_WEST on Time`)%>%
mutate("ALASKA total_delays"= `ALASKA delayed`/(`ALASKA on Time`+`ALASKA delayed`))%>%
mutate("AM_WEST total_delays"= `AM_WEST delayed `/(`AM_WEST on Time`+`AM_WEST delayed `))%>%
round(digits = 2)
airline
## ALASKA on Time ALASKA delayed AM_WEST on Time AM_WEST delayed
## Los Angeles 497 62 694 117
## Phoenix 221 12 4840 415
## San Diego 212 20 383 65
## San Francisco 503 102 320 129
## Seattle 1841 305 201 61
## Total 3274 501 6438 787
## ALASKA total_flights AM_WEST total_flights ALASKA total_delays
## Los Angeles 559 811 0.11
## Phoenix 233 5255 0.05
## San Diego 232 448 0.09
## San Francisco 605 449 0.17
## Seattle 2146 262 0.14
## Total 3775 7225 0.13
## AM_WEST total_delays
## Los Angeles 0.14
## Phoenix 0.08
## San Diego 0.15
## San Francisco 0.29
## Seattle 0.23
## Total 0.11
library(ggplot2)
airline$City<- c("Los Angeles","Phoenix", "San Diego", "San Francisco", "Seattle", "Total")
ggplot(airline, aes(x=City, y=`ALASKA total_delays`)) +
geom_bar(position='dodge', stat='identity')
ggplot(airline, aes(x=City, y= `AM_WEST total_delays`)) +
geom_bar(position=position_dodge(), stat='identity')
When looking at the data in in the bar graphs, we see that AM_WEST has a high delay rate in San Francisco and Seattle which skew its delay rate with these two cities. When looking at ALASKA it shows that the delay rates are relatively similar amongst the cities besides Phoenix and San Diego. This implies that AM_WEST needs to focus on fixing the reasons it has such high delays in San Francisco and Seattle.