Domestic Market Share

#====================================
# Worst Airlines for Flight Delays
#====================================
rm(list = ls())
library(tidyverse)
library(hrbrthemes)
# Reference: https://news.zing.vn/jetstar-vietjet-giu-ky-luc-cham-chuyen-trong-quy-dau-nam-2018-post833416.html?fbclid=IwAR3AZ8QDDxcCAT3GYkptDo_z1eN9x470oth5_Pyxm2gf3EWjXqiq6oO9GEc
df_raw <- data.frame(carrier = rep(c("Vietjet", "Vietnam Airline", "Jetstar", "Vasco"), each = 2, times = 1),
type = rep(c("On_Time", "Delay"), each = 1, times = 2),
n_flight = c(24417, 4844, 29321, 3183, 7087, 1898, 3336, 79))
# Colors selected:
my_colors <- c("#8C3F4D", "#3E606F")
# Prepare data for visualization:
df_raw %>%
spread(key = "type", value = "n_flight") %>%
mutate(total = On_Time + Delay, on_time_rate = On_Time / total, delay_rate = 1 - on_time_rate) %>%
arrange(delay_rate) %>%
mutate(carrier = factor(carrier, levels = carrier)) -> df_for_plots
full_join(df_raw, df_for_plots %>% select(carrier, delay_rate)) -> df1
# Graph 1:
df1 %>%
mutate(per = round(100*delay_rate, 2)) %>%
mutate(per = paste0(per, "%")) %>%
mutate(carrier = paste(carrier, paste0("(", per, ")")), sep = " ") %>%
arrange(delay_rate) %>%
mutate(carrier = factor(carrier, levels = carrier %>% unique())) %>%
ggplot(aes(carrier, n_flight, fill = type)) +
geom_col(position = "fill", width = 0.7) +
coord_flip() +
scale_fill_manual(values = my_colors, name = "", labels = c("Delay", "On-Time")) +
theme_modern_rc(plot_title_size = 22, caption_size = 10,
axis_text_size = 16, axis_title_size = 12) +
scale_y_continuous(labels = scales::percent) +
theme(panel.grid.major.y = element_blank()) +
theme(panel.grid.minor.x = element_blank()) +
labs(title = "The On-Time Performance of Domestic Flights Operated\nby Four Vietnam Air Carriers",
x = NULL, y = "Percent of On-Time/Delay",
caption = "Data Source: Civil Aviation Authority of Viet Nam",
subtitle = "Jetstar and Vietjet are worst airlines by delay flights/cancellations with\n delay/cancellation rates are 21.12% and 16.55% respectively.")
# Graph 2:
df_for_plots %>%
mutate(per = total / sum(total)) %>%
mutate(per = round(100*per, 2)) %>%
mutate(per = paste0(per, "%")) %>%
mutate(carrier = paste(carrier, paste0("(", per, ")")), sep = " ") %>%
arrange(total) %>%
mutate(carrier = factor(carrier, levels = carrier)) %>%
ggplot(aes(carrier, total)) +
geom_col(fill = my_colors[2], width = 0.7) +
# geom_text(aes(label = total), hjust = 1.1, color = "white", size = 6) +
coord_flip() +
theme_modern_rc(plot_title_size = 22, caption_size = 10,
axis_text_size = 16, axis_title_size = 12) +
theme(panel.grid.major.y = element_blank()) +
theme(panel.grid.minor.x = element_blank()) +
labs(title = "Domestic Market Share of Four Airline Carriers in Vietnam",
x = NULL, y = "Number of Flights",
caption = "Data Source: Civil Aviation Authority of Viet Nam",
subtitle = "In the domestic market, the Vietnam Airlines holds a 43.83% share. Note that Jetstar and\nregional subsidiary VASCO are excluded.")
Compare with US Airlines
Xem ra hãng hà ng không tốt nhất nước MÄ© - Hawaiian Airlines vá»›i tỉ lệ cháºm / há»§y chuyến thấp nhất (20.18%) cÅ©ng chỉ bằng hãng hà ng không có tỉ lệ há»§y chuyến cao nhất cá»§a Việt Nam là Jetstar (21.12%):

# Data for US Airlines:
library(nycflights13)
data("flights")
flights %>%
filter(!is.na(dep_delay) & !is.na(carrier)) %>%
mutate(dep_delay_status = case_when(dep_delay > 0 ~ "Delay", TRUE ~ "On-Time")) %>%
group_by(carrier, dep_delay_status) %>%
count() %>%
ungroup() -> df
# Calculate delay rate:
df %>%
spread(key = "dep_delay_status", value = "n") %>%
mutate(total = `On-Time` + Delay, on_time_rate = `On-Time` / total, delay_rate = 1 - on_time_rate) %>%
arrange(delay_rate) %>%
mutate(carrier = factor(carrier, levels = carrier)) -> df_delay
# Collect US Airline names:
library(rvest)
my_link <- "http://listofairlinesintheworld.com/"
my_link %>%
read_html() %>%
html_table(fill = TRUE) -> airline_names
do.call("rbind", airline_names) %>%
select(2:6) %>%
filter(Country == "United States") %>%
rename(carrier = IATA) %>%
select(Airline, carrier) -> df_abbr
df_names <- df_abbr %>%
filter(carrier %in% df$carrier, Airline != "USAir")
# Join two data frames:
full_join(df, df_names, by = c("carrier")) %>%
full_join(df_delay, by = c("carrier")) %>%
select(-carrier) %>%
rename(carrier = Airline, n_flight = n, type = dep_delay_status) -> df_for_plot_us
df_for_plot_us %>%
mutate(per = round(100*delay_rate, 2)) %>%
mutate(per = paste0(per, "%")) %>%
mutate(carrier = paste(carrier, paste0("(", per, ")")), sep = " ") %>%
arrange(delay_rate) %>%
mutate(carrier = factor(carrier, levels = carrier %>% unique())) %>%
ggplot(aes(carrier, n_flight, fill = type)) +
geom_col(position = "fill", width = 0.7) +
coord_flip() +
scale_fill_manual(values = my_colors, name = "", labels = c("Delay", "On-Time")) +
theme_modern_rc(plot_title_size = 22, caption_size = 10,
axis_text_size = 14, axis_title_size = 12) +
scale_y_continuous(labels = scales::percent) +
theme(panel.grid.major.y = element_blank()) +
theme(panel.grid.minor.x = element_blank()) +
labs(title = "The On-Time Performance of Domestic Flights Operated\nfor 16 US Air Carriers",
x = NULL, y = "Percent of On-Time/Delay",
caption = "Data Source: Bureau of Transportation Statistics",
subtitle = "Southwest Airlines and AirTran Airways are worst airlines by delay flights/cancellations\nwith delay/cancellation rates are 54.27% and 51.90% respectively.")
---
title: "Worst Airlines by Flight Delays/Cancellations in Vietnam" 
subtitle: "R for Pleasure"
author: "Nguyen Chi Dung"
output:
  html_document: 
    code_download: true
    code_folding: hide
    highlight: zenburn
    # number_sections: yes
    theme: "flatly"
    toc: TRUE
    toc_float: TRUE
---

```{r setup,include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, fig.retina=2)
```

# The On-Time Performance of Domestic Flights


![](C:\\Users\\Zbook\Desktop\\pic\\p1.jpg)

# Domestic Market Share

![](C:\\Users\\Zbook\Desktop\\pic\\p2.jpg)


```{r, eval=FALSE}

#====================================
#  Worst Airlines for Flight Delays
#====================================

rm(list = ls())

library(tidyverse)
library(hrbrthemes)


# Reference: https://news.zing.vn/jetstar-vietjet-giu-ky-luc-cham-chuyen-trong-quy-dau-nam-2018-post833416.html?fbclid=IwAR3AZ8QDDxcCAT3GYkptDo_z1eN9x470oth5_Pyxm2gf3EWjXqiq6oO9GEc



df_raw <- data.frame(carrier = rep(c("Vietjet", "Vietnam Airline", "Jetstar", "Vasco"), each = 2, times = 1), 
                     type = rep(c("On_Time", "Delay"), each = 1, times = 2), 
                     n_flight = c(24417, 4844, 29321, 3183, 7087, 1898, 3336, 79))

# Colors selected: 
my_colors <- c("#8C3F4D", "#3E606F")


# Prepare data for visualization: 
df_raw %>% 
  spread(key = "type", value = "n_flight") %>% 
  mutate(total = On_Time + Delay, on_time_rate = On_Time / total, delay_rate = 1 - on_time_rate) %>% 
  arrange(delay_rate) %>% 
  mutate(carrier = factor(carrier, levels = carrier)) -> df_for_plots

full_join(df_raw, df_for_plots %>% select(carrier, delay_rate)) -> df1




# Graph 1: 

df1 %>% 
  mutate(per = round(100*delay_rate, 2)) %>% 
  mutate(per = paste0(per, "%")) %>% 
  mutate(carrier = paste(carrier, paste0("(", per, ")")), sep = " ") %>% 
  arrange(delay_rate) %>% 
  mutate(carrier = factor(carrier, levels = carrier %>% unique())) %>% 
  ggplot(aes(carrier, n_flight, fill = type)) + 
  geom_col(position = "fill", width = 0.7) +
  coord_flip() + 
  scale_fill_manual(values = my_colors, name = "", labels = c("Delay", "On-Time")) + 
  theme_modern_rc(plot_title_size = 22, caption_size = 10, 
                  axis_text_size = 16, axis_title_size = 12) +
  scale_y_continuous(labels = scales::percent) + 
  theme(panel.grid.major.y = element_blank()) + 
  theme(panel.grid.minor.x = element_blank()) + 
  labs(title = "The On-Time Performance of Domestic Flights Operated\nby Four Vietnam Air Carriers", 
       x = NULL, y = "Percent of On-Time/Delay", 
       caption = "Data Source: Civil Aviation Authority of Viet Nam", 
       subtitle = "Jetstar and Vietjet are worst airlines by delay flights/cancellations with\n delay/cancellation rates are 21.12% and 16.55% respectively.")


# Graph 2: 

df_for_plots %>% 
  mutate(per = total / sum(total)) %>% 
  mutate(per = round(100*per, 2)) %>% 
  mutate(per = paste0(per, "%")) %>% 
  mutate(carrier = paste(carrier, paste0("(", per, ")")), sep = " ") %>% 
  arrange(total) %>% 
  mutate(carrier = factor(carrier, levels = carrier)) %>%
  ggplot(aes(carrier, total)) + 
  geom_col(fill = my_colors[2], width = 0.7) + 
  # geom_text(aes(label = total), hjust = 1.1, color = "white", size = 6) + 
  coord_flip() + 
  theme_modern_rc(plot_title_size = 22, caption_size = 10,
                  axis_text_size = 16, axis_title_size = 12) +
  theme(panel.grid.major.y = element_blank()) + 
  theme(panel.grid.minor.x = element_blank()) + 
  labs(title = "Domestic Market Share of Four Airline Carriers in Vietnam", 
       x = NULL, y = "Number of Flights", 
       caption = "Data Source: Civil Aviation Authority of Viet Nam", 
       subtitle = "In the domestic market, the Vietnam Airlines holds a 43.83% share. Note that Jetstar and\nregional subsidiary VASCO are excluded.")


```

# Compare with US Airlines

Xem ra hãng hàng không tốt nhất nước Mĩ - Hawaiian Airlines với tỉ lệ chậm / hủy chuyến thấp nhất (20.18%) cũng chỉ bằng hãng hàng không có tỉ lệ hủy chuyến cao nhất của Việt Nam là Jetstar (21.12%): 

![](C:\\Users\\Zbook\Desktop\\pic\\p3.jpg)

```{r, eval=FALSE}
# Data for US Airlines: 
library(nycflights13)
data("flights")


flights %>% 
  filter(!is.na(dep_delay) & !is.na(carrier)) %>% 
  mutate(dep_delay_status = case_when(dep_delay > 0 ~ "Delay", TRUE ~ "On-Time")) %>% 
  group_by(carrier, dep_delay_status) %>% 
  count() %>% 
  ungroup() -> df

# Calculate delay rate: 

df %>% 
  spread(key = "dep_delay_status", value = "n") %>% 
  mutate(total = `On-Time` + Delay, on_time_rate = `On-Time` / total, delay_rate = 1 - on_time_rate) %>% 
  arrange(delay_rate) %>% 
  mutate(carrier = factor(carrier, levels = carrier)) -> df_delay

# Collect US Airline names: 

library(rvest)

my_link <- "http://listofairlinesintheworld.com/"

my_link %>% 
  read_html() %>% 
  html_table(fill = TRUE) -> airline_names

do.call("rbind", airline_names) %>% 
  select(2:6) %>% 
  filter(Country == "United States") %>% 
  rename(carrier = IATA) %>% 
  select(Airline, carrier) -> df_abbr


df_names <- df_abbr %>% 
  filter(carrier %in% df$carrier, Airline != "USAir")

# Join two data frames: 
full_join(df, df_names, by = c("carrier")) %>% 
  full_join(df_delay, by = c("carrier")) %>%
  select(-carrier) %>% 
  rename(carrier = Airline, n_flight = n, type = dep_delay_status) -> df_for_plot_us

df_for_plot_us %>% 
  mutate(per = round(100*delay_rate, 2)) %>% 
  mutate(per = paste0(per, "%")) %>% 
  mutate(carrier = paste(carrier, paste0("(", per, ")")), sep = " ") %>% 
  arrange(delay_rate) %>% 
  mutate(carrier = factor(carrier, levels = carrier %>% unique())) %>% 
  ggplot(aes(carrier, n_flight, fill = type)) + 
  geom_col(position = "fill", width = 0.7) +
  coord_flip() + 
  scale_fill_manual(values = my_colors, name = "", labels = c("Delay", "On-Time")) + 
  theme_modern_rc(plot_title_size = 22, caption_size = 10, 
                  axis_text_size = 14, axis_title_size = 12) +
  scale_y_continuous(labels = scales::percent) + 
  theme(panel.grid.major.y = element_blank()) + 
  theme(panel.grid.minor.x = element_blank()) + 
  labs(title = "The On-Time Performance of Domestic Flights Operated\nfor 16 US Air Carriers", 
       x = NULL, y = "Percent of On-Time/Delay", 
       caption = "Data Source: Bureau of Transportation Statistics", 
       subtitle = "Southwest Airlines and AirTran Airways are worst airlines by delay flights/cancellations\nwith delay/cancellation rates are 54.27% and 51.90% respectively.")

```

