Domestic Market Share

#====================================
# Worst Airlines for Flight Delays
#====================================
rm(list = ls())
library(tidyverse)
library(hrbrthemes)
# Reference: https://news.zing.vn/jetstar-vietjet-giu-ky-luc-cham-chuyen-trong-quy-dau-nam-2018-post833416.html?fbclid=IwAR3AZ8QDDxcCAT3GYkptDo_z1eN9x470oth5_Pyxm2gf3EWjXqiq6oO9GEc
df_raw <- data.frame(carrier = rep(c("Vietjet", "Vietnam Airline", "Jetstar", "Vasco"), each = 2, times = 1),
type = rep(c("On_Time", "Delay"), each = 1, times = 2),
n_flight = c(24417, 4844, 29321, 3183, 7087, 1898, 3336, 79))
# Colors selected:
my_colors <- c("#8C3F4D", "#3E606F")
# Prepare data for visualization:
df_raw %>%
spread(key = "type", value = "n_flight") %>%
mutate(total = On_Time + Delay, on_time_rate = On_Time / total, delay_rate = 1 - on_time_rate) %>%
arrange(delay_rate) %>%
mutate(carrier = factor(carrier, levels = carrier)) -> df_for_plots
full_join(df_raw, df_for_plots %>% select(carrier, delay_rate)) -> df1
# Graph 1:
df1 %>%
mutate(per = round(100*delay_rate, 2)) %>%
mutate(per = paste0(per, "%")) %>%
mutate(carrier = paste(carrier, paste0("(", per, ")")), sep = " ") %>%
arrange(delay_rate) %>%
mutate(carrier = factor(carrier, levels = carrier %>% unique())) %>%
ggplot(aes(carrier, n_flight, fill = type)) +
geom_col(position = "fill", width = 0.7) +
coord_flip() +
scale_fill_manual(values = my_colors, name = "", labels = c("Delay", "On-Time")) +
theme_modern_rc(plot_title_size = 22, caption_size = 10,
axis_text_size = 16, axis_title_size = 12) +
scale_y_continuous(labels = scales::percent) +
theme(panel.grid.major.y = element_blank()) +
theme(panel.grid.minor.x = element_blank()) +
labs(title = "The On-Time Performance of Domestic Flights Operated\nby Four Vietnam Air Carriers",
x = NULL, y = "Percent of On-Time/Delay",
caption = "Data Source: Civil Aviation Authority of Viet Nam",
subtitle = "Jetstar and Vietjet are worst airlines by delay flights/cancellations with\n delay/cancellation rates are 21.12% and 16.55% respectively.")
# Graph 2:
df_for_plots %>%
mutate(per = total / sum(total)) %>%
mutate(per = round(100*per, 2)) %>%
mutate(per = paste0(per, "%")) %>%
mutate(carrier = paste(carrier, paste0("(", per, ")")), sep = " ") %>%
arrange(total) %>%
mutate(carrier = factor(carrier, levels = carrier)) %>%
ggplot(aes(carrier, total)) +
geom_col(fill = my_colors[2], width = 0.7) +
# geom_text(aes(label = total), hjust = 1.1, color = "white", size = 6) +
coord_flip() +
theme_modern_rc(plot_title_size = 22, caption_size = 10,
axis_text_size = 16, axis_title_size = 12) +
theme(panel.grid.major.y = element_blank()) +
theme(panel.grid.minor.x = element_blank()) +
labs(title = "Domestic Market Share of Four Airline Carriers in Vietnam",
x = NULL, y = "Number of Flights",
caption = "Data Source: Civil Aviation Authority of Viet Nam",
subtitle = "In the domestic market, the Vietnam Airlines holds a 43.83% share. Note that Jetstar and\nregional subsidiary VASCO are excluded.")
Compare with US Airlines
Xem ra hãng hà ng không tốt nhất nước MÄ© - Hawaiian Airlines vá»›i tỉ lệ cháºm / há»§y chuyến thấp nhất (20.18%) cÅ©ng chỉ bằng hãng hà ng không có tỉ lệ há»§y chuyến cao nhất cá»§a Việt Nam là Jetstar (21.12%):

# Data for US Airlines:
library(nycflights13)
data("flights")
flights %>%
filter(!is.na(dep_delay) & !is.na(carrier)) %>%
mutate(dep_delay_status = case_when(dep_delay > 0 ~ "Delay", TRUE ~ "On-Time")) %>%
group_by(carrier, dep_delay_status) %>%
count() %>%
ungroup() -> df
# Calculate delay rate:
df %>%
spread(key = "dep_delay_status", value = "n") %>%
mutate(total = `On-Time` + Delay, on_time_rate = `On-Time` / total, delay_rate = 1 - on_time_rate) %>%
arrange(delay_rate) %>%
mutate(carrier = factor(carrier, levels = carrier)) -> df_delay
# Collect US Airline names:
library(rvest)
my_link <- "http://listofairlinesintheworld.com/"
my_link %>%
read_html() %>%
html_table(fill = TRUE) -> airline_names
do.call("rbind", airline_names) %>%
select(2:6) %>%
filter(Country == "United States") %>%
rename(carrier = IATA) %>%
select(Airline, carrier) -> df_abbr
df_names <- df_abbr %>%
filter(carrier %in% df$carrier, Airline != "USAir")
# Join two data frames:
full_join(df, df_names, by = c("carrier")) %>%
full_join(df_delay, by = c("carrier")) %>%
select(-carrier) %>%
rename(carrier = Airline, n_flight = n, type = dep_delay_status) -> df_for_plot_us
df_for_plot_us %>%
mutate(per = round(100*delay_rate, 2)) %>%
mutate(per = paste0(per, "%")) %>%
mutate(carrier = paste(carrier, paste0("(", per, ")")), sep = " ") %>%
arrange(delay_rate) %>%
mutate(carrier = factor(carrier, levels = carrier %>% unique())) %>%
ggplot(aes(carrier, n_flight, fill = type)) +
geom_col(position = "fill", width = 0.7) +
coord_flip() +
scale_fill_manual(values = my_colors, name = "", labels = c("Delay", "On-Time")) +
theme_modern_rc(plot_title_size = 22, caption_size = 10,
axis_text_size = 14, axis_title_size = 12) +
scale_y_continuous(labels = scales::percent) +
theme(panel.grid.major.y = element_blank()) +
theme(panel.grid.minor.x = element_blank()) +
labs(title = "The On-Time Performance of Domestic Flights Operated\nfor 16 US Air Carriers",
x = NULL, y = "Percent of On-Time/Delay",
caption = "Data Source: Bureau of Transportation Statistics",
subtitle = "Southwest Airlines and AirTran Airways are worst airlines by delay flights/cancellations\nwith delay/cancellation rates are 54.27% and 51.90% respectively.")
