library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(data.table)
##
## Attaching package: 'data.table'
##
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
##
## The following objects are masked from 'package:dplyr':
##
## between, first, last
##
## The following object is masked from 'package:purrr':
##
## transpose
url <- "https://raw.githubusercontent.com/folushoa/Data-Science/Data-607/Assignment%205/Assignment%205.csv"
airline_data <- fread(url, na.strings = "")
airline_data
## Airline Status Los Angeles Phoenix San Diego San Francisco Seattle
## 1: Alaska On Time 497 221 212 503 1841
## 2: <NA> Delayed 62 12 20 102 305
## 3: AM West On Time 694 4840 383 320 201
## 4: <NA> Delayed 117 415 65 129 61
airline_data <- airline_data %>%
fill(Airline, .direction = "down")
airline_data
## Airline Status Los Angeles Phoenix San Diego San Francisco Seattle
## 1: Alaska On Time 497 221 212 503 1841
## 2: Alaska Delayed 62 12 20 102 305
## 3: AM West On Time 694 4840 383 320 201
## 4: AM West Delayed 117 415 65 129 61
airline_data <- airline_data %>%
pivot_longer(
cols = !c(Airline, Status),
names_to = "City",
values_to = "Count"
)
airline_data
## # A tibble: 20 × 4
## Airline Status City Count
## <chr> <chr> <chr> <int>
## 1 Alaska On Time Los Angeles 497
## 2 Alaska On Time Phoenix 221
## 3 Alaska On Time San Diego 212
## 4 Alaska On Time San Francisco 503
## 5 Alaska On Time Seattle 1841
## 6 Alaska Delayed Los Angeles 62
## 7 Alaska Delayed Phoenix 12
## 8 Alaska Delayed San Diego 20
## 9 Alaska Delayed San Francisco 102
## 10 Alaska Delayed Seattle 305
## 11 AM West On Time Los Angeles 694
## 12 AM West On Time Phoenix 4840
## 13 AM West On Time San Diego 383
## 14 AM West On Time San Francisco 320
## 15 AM West On Time Seattle 201
## 16 AM West Delayed Los Angeles 117
## 17 AM West Delayed Phoenix 415
## 18 AM West Delayed San Diego 65
## 19 AM West Delayed San Francisco 129
## 20 AM West Delayed Seattle 61
Compare arrival delays of the two airlines
airline_data_delay <- airline_data %>%
filter(Status == "Delayed") %>%
group_by(Airline) %>%
summarise(
"Total Count" = sum(Count),
) %>%
mutate(
"Proportion" = signif(`Total Count`/sum(`Total Count`), digits = 4)
)
airline_data_delay
## # A tibble: 2 × 3
## Airline `Total Count` Proportion
## <chr> <int> <dbl>
## 1 AM West 787 0.611
## 2 Alaska 501 0.389
airline_data_delay %>%
ggplot(aes(x = `Airline`, y = `Proportion`)) +
geom_bar(stat = "identity")
Conclusion: From the table and the plot we see that Alaska airline had less delays than AM West, 0.389 to 0.611
Compare arrival delays by city
airline_data_delay <- airline_data %>%
filter(Status == "Delayed") %>%
group_by(Airline) %>%
mutate(
"Proportion" = signif(`Count`/sum(`Count`), digits = 3)
)
airline_data_delay
## # A tibble: 10 × 5
## # Groups: Airline [2]
## Airline Status City Count Proportion
## <chr> <chr> <chr> <int> <dbl>
## 1 Alaska Delayed Los Angeles 62 0.124
## 2 Alaska Delayed Phoenix 12 0.024
## 3 Alaska Delayed San Diego 20 0.0399
## 4 Alaska Delayed San Francisco 102 0.204
## 5 Alaska Delayed Seattle 305 0.609
## 6 AM West Delayed Los Angeles 117 0.149
## 7 AM West Delayed Phoenix 415 0.527
## 8 AM West Delayed San Diego 65 0.0826
## 9 AM West Delayed San Francisco 129 0.164
## 10 AM West Delayed Seattle 61 0.0775
airline_data_delay %>%
ggplot(aes(x = `Airline`, y = `Proportion`)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~City)
Conclusion: From the plots by City, AM West arrives late more times than Alaska airline. So I would conclude that it is better to travel by AM West than Alaska.