0. Load Libraries & Cleaned Data

library(tidyverse)
library(lubridate)
library(scales)
library(plotly)

# Load cleaned files (output from aviation_cleaning.Rmd)
flight_cancellations <- read_csv("cleaned_flight_cancellations.csv")
conflict_events      <- read_csv("cleaned_conflict_events.csv")
airline_losses       <- read_csv("cleaned_airline_losses.csv")
airport_disruptions  <- read_csv("cleaned_airport_disruptions.csv")
airspace_closures    <- read_csv("cleaned_airspace_closures.csv")
flight_reroutes      <- read_csv("cleaned_flight_reroutes.csv")
analysis_main        <- read_csv("cleaned_analysis_main.csv")

# Restore ordered factor for severity
conflict_events <- conflict_events %>%
  mutate(severity = factor(severity,
                           levels = c("LOW", "MEDIUM", "HIGH", "CRITICAL"),
                           ordered = TRUE))

1. Dataset Overview

datasets <- list(
  flight_cancellations = flight_cancellations,
  conflict_events      = conflict_events,
  airline_losses       = airline_losses,
  airport_disruptions  = airport_disruptions,
  airspace_closures    = airspace_closures,
  flight_reroutes      = flight_reroutes
)

map_dfr(datasets, ~ tibble(rows = nrow(.), cols = ncol(.)), .id = "table") %>%
  arrange(desc(rows))
## # A tibble: 6 × 3
##   table                 rows  cols
##   <chr>                <int> <int>
## 1 flight_cancellations    50    10
## 2 flight_reroutes         45    10
## 3 airline_losses          35    10
## 4 airport_disruptions     35    13
## 5 conflict_events         28    10
## 6 airspace_closures       25     9

2. Conflict Timeline

2.1 Events by Day and Severity

conflict_events %>%
  filter(!is_diplomatic) %>%
  count(date, severity) %>%
  ggplot(aes(x = date, y = n, fill = severity)) +
  geom_col(position = "stack") +
  scale_fill_manual(values = c(
    "LOW"      = "#d4e6f1",
    "MEDIUM"   = "#f0b429",
    "HIGH"     = "#e07b39",
    "CRITICAL" = "#c0392b"
  )) +
  scale_x_date(date_breaks = "1 day", date_labels = "%b %d") +
  labs(title    = "Conflict Events by Day and Severity",
       subtitle = "Feb 28 – Mar 7, 2026 | Diplomatic events excluded",
       x = NULL, y = "Number of Events", fill = "Severity") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

2.2 Severity Breakdown

conflict_events %>%
  count(severity) %>%
  mutate(pct = n / sum(n)) %>%
  ggplot(aes(x = severity, y = n, fill = severity)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = paste0(n, " (", percent(pct, 1), ")")),
            vjust = -0.5, size = 4) +
  scale_fill_manual(values = c(
    "LOW"      = "#d4e6f1",
    "MEDIUM"   = "#f0b429",
    "HIGH"     = "#e07b39",
    "CRITICAL" = "#c0392b"
  )) +
  labs(title = "Conflict Event Severity Distribution",
       x = "Severity", y = "Count") +
  theme_minimal(base_size = 13)

2.3 Events by Type

conflict_events %>%
  mutate(event_type = fct_lump_n(event_type, n = 6)) %>%
  count(event_type, severity) %>%
  ggplot(aes(x = reorder(event_type, n), y = n, fill = severity)) +
  geom_col(position = "stack") +
  coord_flip() +
  scale_fill_manual(values = c(
    "LOW"      = "#d4e6f1",
    "MEDIUM"   = "#f0b429",
    "HIGH"     = "#e07b39",
    "CRITICAL" = "#c0392b"
  )) +
  labs(title = "Conflict Events by Type",
       x = NULL, y = "Count", fill = "Severity") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")


3. Flight Cancellations

3.1 Cancellations Over Time

flight_cancellations %>%
  count(date) %>%
  ggplot(aes(x = date, y = n)) +
  geom_line(color = "#1a3a5c", linewidth = 1.2) +
  geom_point(color = "#1a3a5c", size = 3) +
  geom_area(alpha = 0.15, fill = "#1a3a5c") +
  scale_x_date(date_breaks = "1 day", date_labels = "%b %d") +
  labs(title    = "Flight Cancellations Per Day",
       subtitle = "Feb 28 – Mar 7, 2026",
       x = NULL, y = "Cancelled Flights") +
  theme_minimal(base_size = 13)

3.2 Cancellations by Country

flight_cancellations %>%
  count(country, sort = TRUE) %>%
  mutate(country = fct_reorder(country, n)) %>%
  ggplot(aes(x = n, y = country, fill = n)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = n), hjust = -0.3, size = 3.8) +
  scale_fill_gradient(low = "#aec6e8", high = "#1a3a5c") +
  labs(title = "Total Flight Cancellations by Country",
       x = "Cancelled Flights", y = NULL) +
  theme_minimal(base_size = 13)

3.3 Cancellations by Airline

flight_cancellations %>%
  count(airline, sort = TRUE) %>%
  mutate(airline = fct_reorder(airline, n)) %>%
  ggplot(aes(x = n, y = airline, fill = n)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = n), hjust = -0.3, size = 3.8) +
  scale_fill_gradient(low = "#aec6e8", high = "#1a3a5c") +
  labs(title = "Total Cancellations by Airline",
       x = "Cancelled Flights", y = NULL) +
  theme_minimal(base_size = 13)

3.4 Cancellation Reason Categories

flight_cancellations %>%
  mutate(reason_category = case_when(
    str_detect(cancellation_reason, regex("airspace closed|fir", ignore_case = TRUE)) ~ "Airspace / FIR Closure",
    str_detect(cancellation_reason, regex("destination", ignore_case = TRUE))         ~ "Destination Airspace Closed",
    str_detect(cancellation_reason, regex("precautionary", ignore_case = TRUE))       ~ "Precautionary Advisory",
    str_detect(cancellation_reason, regex("military|conflict", ignore_case = TRUE))   ~ "Military / Conflict Zone",
    TRUE ~ "Other"
  )) %>%
  count(reason_category, sort = TRUE) %>%
  mutate(pct = n / sum(n),
         reason_category = fct_reorder(reason_category, n)) %>%
  ggplot(aes(x = n, y = reason_category, fill = reason_category)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = paste0(n, " (", percent(pct, 1), ")")), hjust = -0.2, size = 3.8) +
  scale_fill_brewer(palette = "Blues", direction = 1) +
  labs(title = "Cancellations by Reason Category",
       x = "Count", y = NULL) +
  theme_minimal(base_size = 13)

3.5 Aircraft Type Distribution

flight_cancellations %>%
  mutate(aircraft_family = case_when(
    str_detect(aircraft_type, "777")   ~ "Boeing 777",
    str_detect(aircraft_type, "787")   ~ "Boeing 787",
    str_detect(aircraft_type, "737")   ~ "Boeing 737",
    str_detect(aircraft_type, "A320|A321|A319") ~ "Airbus A320 Family",
    str_detect(aircraft_type, "A330")  ~ "Airbus A330",
    str_detect(aircraft_type, "A340")  ~ "Airbus A340",
    str_detect(aircraft_type, "A350")  ~ "Airbus A350",
    TRUE ~ "Other"
  )) %>%
  count(aircraft_family, sort = TRUE) %>%
  mutate(pct = n / sum(n),
         aircraft_family = fct_reorder(aircraft_family, n)) %>%
  ggplot(aes(x = n, y = aircraft_family, fill = aircraft_family)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = paste0(n, " (", percent(pct, 1), ")")), hjust = -0.2, size = 3.8) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Affected Aircraft Types (Cancellations)",
       x = "Count", y = NULL) +
  theme_minimal(base_size = 13)


4. Airport Disruptions

4.1 Top Airports by Total Disruptions

airport_disruptions %>%
  arrange(desc(total_disruptions)) %>%
  head(15) %>%
  mutate(airport = fct_reorder(paste0(airport, " (", iata, ")"), total_disruptions)) %>%
  select(airport, flights_cancelled, flights_delayed, flights_diverted) %>%
  pivot_longer(cols = -airport, names_to = "type", values_to = "count") %>%
  mutate(type = str_replace_all(type, "_", " ") %>% str_to_title()) %>%
  ggplot(aes(x = count, y = airport, fill = type)) +
  geom_col(position = "stack") +
  scale_fill_manual(values = c(
    "Flights Cancelled" = "#c0392b",
    "Flights Delayed"   = "#f0b429",
    "Flights Diverted"  = "#1a3a5c"
  )) +
  labs(title = "Top 15 Airports by Total Disruptions",
       x = "Flights", y = NULL, fill = "Disruption Type") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

4.2 Operational Status Breakdown

airport_disruptions %>%
  count(status_category) %>%
  mutate(pct = n / sum(n),
         status_category = fct_reorder(status_category, n)) %>%
  ggplot(aes(x = n, y = status_category, fill = status_category)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = paste0(n, " airports (", percent(pct, 1), ")")),
            hjust = -0.1, size = 3.8) +
  scale_fill_manual(values = c(
    "Closed"      = "#c0392b",
    "Restricted"  = "#e07b39",
    "Advisory"    = "#f0b429",
    "Operational" = "#27ae60"
  )) +
  labs(title = "Airport Operational Status Distribution",
       x = "Number of Airports", y = NULL) +
  theme_minimal(base_size = 13)

4.3 Cancellations vs Delays by Airport Status

airport_disruptions %>%
  group_by(status_category) %>%
  summarise(
    avg_cancelled = mean(flights_cancelled),
    avg_delayed   = mean(flights_delayed),
    avg_diverted  = mean(flights_diverted),
    .groups = "drop"
  ) %>%
  pivot_longer(cols = -status_category, names_to = "metric", values_to = "avg") %>%
  mutate(metric = str_replace(metric, "avg_", "Avg ") %>% str_to_title()) %>%
  ggplot(aes(x = status_category, y = avg, fill = metric)) +
  geom_col(position = "dodge") +
  scale_fill_manual(values = c(
    "Avg Cancelled" = "#c0392b",
    "Avg Delayed"   = "#f0b429",
    "Avg Diverted"  = "#1a3a5c"
  )) +
  labs(title = "Average Disruptions by Operational Status",
       x = "Status Category", y = "Average Flights", fill = NULL) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")


5. Airspace Closures

5.1 Closure Duration by Country

# Run this to see what's going on
airspace_closures %>%
  mutate(country_region = paste0(as.character(country), " — ", as.character(region))) %>%
  group_by(country_region, duration_tier) %>%
  summarise(closure_duration_hours = sum(closure_duration_hours), .groups = "drop") %>%
  ggplot(aes(x = closure_duration_hours,
             y = reorder(country_region, closure_duration_hours),
             fill = duration_tier)) +
  geom_col() +
  scale_fill_manual(values = c(
    "Extended (100h+)"     = "#c0392b",
    "Prolonged (48-100h)"  = "#e07b39",
    "Significant (12-48h)" = "#f0b429",
    "Short (<12h)"         = "#aec6e8"
  )) +
  labs(title    = "Airspace Closure Duration by Country / FIR",
       subtitle = "Sorted by duration",
       x = "Hours Closed", y = NULL, fill = "Duration Tier") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

5.2 Closure Timeline (Gantt)

airspace_closures %>%
  mutate(country_region = paste0(as.character(country), " — ", as.character(region))) %>%
  ggplot(aes(xmin = closure_start_time, xmax = closure_end_time,
             y = reorder(country_region, desc(closure_start_time)),
             color = duration_tier)) +
  geom_linerange(linewidth = 4) +
  scale_color_manual(values = c(
    "Extended (100h+)"     = "#c0392b",
    "Prolonged (48-100h)"  = "#e07b39",
    "Significant (12-48h)" = "#f0b429",
    "Short (<12h)"         = "#aec6e8"
  )) +
  scale_x_datetime(date_breaks = "1 day", date_labels = "%b %d") +
  labs(title    = "Airspace Closure Timeline by Country / FIR",
       subtitle = "Feb 28 – Mar 7, 2026",
       x = NULL, y = NULL, color = "Duration Tier") +
  theme_minimal(base_size = 11) +
  theme(legend.position = "bottom",
        axis.text.y = element_text(size = 9))


6. Airline Financial Impact

6.1 Daily Loss by Airline

airline_losses %>%
  mutate(airline = fct_reorder(airline, estimated_daily_loss_usd)) %>%
  ggplot(aes(x = estimated_daily_loss_usd, y = airline, fill = country)) +
  geom_col() +
  scale_x_continuous(labels = label_dollar(scale = 1e-6, suffix = "M")) +
  labs(title = "Estimated Daily Financial Loss by Airline",
       x = "Daily Loss (USD)", y = NULL, fill = "Country") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "right",
        legend.text = element_text(size = 8))

6.2 Loss vs Passengers Impacted

p <- airline_losses %>%
  ggplot(aes(x = passengers_impacted, y = estimated_daily_loss_usd,
             color = country, text = airline)) +
  geom_point(size = 3, alpha = 0.85) +
  scale_y_continuous(labels = label_dollar(scale = 1e-6, suffix = "M")) +
  scale_x_continuous(labels = label_comma()) +
  labs(title = "Daily Loss vs Passengers Impacted",
       x = "Passengers Impacted", y = "Daily Loss (USD)", color = "Country") +
  theme_minimal(base_size = 13)

p

6.3 Loss Per Passenger by Airline

airline_losses %>%
  filter(!is.na(loss_per_passenger_usd)) %>%
  mutate(airline = fct_reorder(airline, loss_per_passenger_usd)) %>%
  ggplot(aes(x = loss_per_passenger_usd, y = airline, fill = loss_per_passenger_usd)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = dollar(loss_per_passenger_usd)), hjust = -0.2, size = 3.5) +
  scale_fill_gradient(low = "#aec6e8", high = "#c0392b") +
  scale_x_continuous(labels = label_dollar(), expand = expansion(mult = c(0, 0.15))) +
  labs(title = "Estimated Daily Loss Per Passenger Impacted",
       x = "USD per Passenger", y = NULL) +
  theme_minimal(base_size = 13)

6.4 Cancelled vs Rerouted Flights by Airline

airline_losses %>%
  select(airline, cancelled_flights, rerouted_flights) %>%
  pivot_longer(cols = -airline, names_to = "type", values_to = "flights") %>%
  mutate(type = if_else(type == "cancelled_flights", "Cancelled", "Rerouted")) %>%
  ggplot(aes(x = reorder(airline, -flights), y = flights, fill = type)) +
  geom_col(position = "dodge") +
  scale_fill_manual(values = c("Cancelled" = "#c0392b", "Rerouted" = "#1a3a5c")) +
  labs(title = "Cancelled vs Rerouted Flights by Airline",
       x = NULL, y = "Flights", fill = NULL) +
  theme_minimal(base_size = 11) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "top")


7. Flight Reroutes

7.1 Additional Distance Distribution

flight_reroutes %>%
  ggplot(aes(x = additional_distance_km)) +
  geom_histogram(binwidth = 100, fill = "#1a3a5c", color = "white") +
  geom_vline(xintercept = mean(flight_reroutes$additional_distance_km),
             color = "#f0b429", linewidth = 1.2, linetype = "dashed") +
  annotate("text",
           x = mean(flight_reroutes$additional_distance_km) + 30,
           y = 8,
           label = paste0("Mean: ",
                          round(mean(flight_reroutes$additional_distance_km)), " km"),
           color = "#f0b429", hjust = 0, size = 4) +
  labs(title = "Distribution of Additional Distance from Reroutes",
       x = "Additional Distance (km)", y = "Count") +
  theme_minimal(base_size = 13)

7.2 Top Reroutes by Additional Distance

flight_reroutes %>%
  mutate(label = paste0(airline, "\n", flight_id)) %>%
  arrange(desc(additional_distance_km)) %>%
  head(10) %>%
  mutate(label = fct_reorder(label, additional_distance_km)) %>%
  ggplot(aes(x = additional_distance_km, y = label, fill = additional_fuel_cost_usd)) +
  geom_col() +
  scale_fill_gradient(low = "#aec6e8", high = "#c0392b",
                      labels = label_dollar(scale = 1e-3, suffix = "K")) +
  scale_x_continuous(labels = label_comma()) +
  labs(title    = "Top 10 Reroutes by Additional Distance",
       subtitle = "Color = additional fuel cost",
       x = "Additional Distance (km)", y = NULL, fill = "Fuel Cost (USD)") +
  theme_minimal(base_size = 12)

7.3 Additional Distance vs Delay

p2 <- flight_reroutes %>%
  ggplot(aes(x = additional_distance_km, y = delay_minutes,
             color = airline, text = flight_id)) +
  geom_point(size = 3, alpha = 0.8) +
  geom_smooth(method = "lm", se = TRUE, color = "gray40", linewidth = 0.8) +
  scale_x_continuous(labels = label_comma()) +
  labs(title = "Additional Distance vs Delay Minutes",
       x = "Additional Distance (km)", y = "Delay (minutes)") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

p2

7.4 Fuel Cost per km by Airline

flight_reroutes %>%
  group_by(airline) %>%
  summarise(avg_fuel_per_km = mean(fuel_cost_per_km), .groups = "drop") %>%
  mutate(airline = fct_reorder(airline, avg_fuel_per_km)) %>%
  ggplot(aes(x = avg_fuel_per_km, y = airline, fill = avg_fuel_per_km)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = dollar(avg_fuel_per_km, accuracy = 0.01)),
            hjust = -0.2, size = 3.5) +
  scale_fill_gradient(low = "#aec6e8", high = "#c0392b") +
  scale_x_continuous(labels = label_dollar(), expand = expansion(mult = c(0, 0.15))) +
  labs(title = "Average Fuel Cost per Rerouted km by Airline",
       x = "USD per km", y = NULL) +
  theme_minimal(base_size = 13)


8. Cross-Table Summary

8.1 Cancellations by Day Overlaid with Conflict Severity

daily_cancellations <- flight_cancellations %>%
  count(date, name = "cancellations")

daily_critical <- conflict_events %>%
  filter(!is_diplomatic) %>%
  group_by(date) %>%
  summarise(critical_events = sum(severity == "CRITICAL"), .groups = "drop")

daily_cancellations %>%
  left_join(daily_critical, by = "date") %>%
  mutate(critical_events = replace_na(critical_events, 0)) %>%
  ggplot(aes(x = date)) +
  geom_col(aes(y = cancellations), fill = "#1a3a5c", alpha = 0.8) +
  geom_line(aes(y = critical_events * 5), color = "#c0392b",
            linewidth = 1.2, linetype = "dashed") +
  geom_point(aes(y = critical_events * 5), color = "#c0392b", size = 3) +
  scale_y_continuous(
    name = "Cancelled Flights",
    sec.axis = sec_axis(~ . / 5, name = "Critical Conflict Events")
  ) +
  scale_x_date(date_breaks = "1 day", date_labels = "%b %d") +
  labs(title    = "Daily Cancellations vs Critical Conflict Events",
       subtitle = "Red dashed = critical events (right axis) | Blue bars = cancellations (left axis)",
       x = NULL) +
  theme_minimal(base_size = 13) +
  theme(axis.title.y.right = element_text(color = "#c0392b"),
        axis.text.y.right  = element_text(color = "#c0392b"))

8.2 Total Impact Summary Table

summary_table <- tibble(
  Metric = c(
    "Total flight cancellations",
    "Total flight reroutes",
    "Countries with airspace closures",
    "Airports disrupted",
    "Conflict events recorded",
    "Longest airspace closure (hours)",
    "Total est. daily airline losses (USD)",
    "Total passengers impacted",
    "Avg additional reroute distance (km)",
    "Avg reroute delay (minutes)"
  ),
  Value = c(
    nrow(flight_cancellations),
    nrow(flight_reroutes),
    n_distinct(airspace_closures$country),
    nrow(airport_disruptions),
    nrow(conflict_events),
    max(airspace_closures$closure_duration_hours),
    dollar(sum(airline_losses$estimated_daily_loss_usd)),
    comma(sum(airline_losses$passengers_impacted)),
    comma(round(mean(flight_reroutes$additional_distance_km))),
    round(mean(flight_reroutes$delay_minutes))
  )
)

knitr::kable(summary_table, align = c("l", "r"),
             caption = "Global Civil Aviation Disruption — Summary Statistics")
Global Civil Aviation Disruption — Summary Statistics
Metric Value
Total flight cancellations 50
Total flight reroutes 45
Countries with airspace closures 24
Airports disrupted 35
Conflict events recorded 28
Longest airspace closure (hours) NA
Total est. daily airline losses (USD) $48,840,000
Total passengers impacted 119,570
Avg additional reroute distance (km) 739
Avg reroute delay (minutes) 65

9. Session Info

sessionInfo()
## R version 4.3.1 (2023-06-16 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] plotly_4.12.0   scales_1.4.0    lubridate_1.9.4 forcats_1.0.1  
##  [5] stringr_1.5.2   dplyr_1.1.4     purrr_1.0.4     readr_2.1.5    
##  [9] tidyr_1.3.1     tibble_3.2.1    ggplot2_4.0.0   tidyverse_2.0.0
## 
## loaded via a namespace (and not attached):
##  [1] utf8_1.2.4         sass_0.4.9         generics_0.1.4     lattice_0.21-8    
##  [5] stringi_1.8.7      hms_1.1.3          digest_0.6.37      magrittr_2.0.3    
##  [9] evaluate_1.0.5     grid_4.3.1         timechange_0.3.0   RColorBrewer_1.1-3
## [13] fastmap_1.2.0      Matrix_1.6-5       jsonlite_2.0.0     mgcv_1.8-42       
## [17] httr_1.4.7         viridisLite_0.4.2  lazyeval_0.2.2     jquerylib_0.1.4   
## [21] cli_3.6.4          rlang_1.1.5        crayon_1.5.3       splines_4.3.1     
## [25] bit64_4.6.0-1      withr_3.0.2        cachem_1.1.0       yaml_2.3.10       
## [29] parallel_4.3.1     tools_4.3.1        tzdb_0.5.0         vctrs_0.6.5       
## [33] R6_2.6.1           lifecycle_1.0.4    htmlwidgets_1.6.4  bit_4.6.0         
## [37] vroom_1.6.5        pkgconfig_2.0.3    pillar_1.11.1      bslib_0.9.0       
## [41] gtable_0.3.6       glue_1.8.0         data.table_1.17.0  xfun_0.52         
## [45] tidyselect_1.2.1   rstudioapi_0.17.1  knitr_1.50         farver_2.1.2      
## [49] nlme_3.1-162       htmltools_0.5.8.1  labeling_0.4.3     rmarkdown_2.30    
## [53] compiler_4.3.1     S7_0.2.0