# Load required packages
pacman::p_load(readr, dplyr, tidyverse, scales, grid, gridExtra, ggplot2, lubridate, tidyr, leaflet, ggcorrplot, GGally, reshape2)

# Load data
data <- read.csv("vietnam_climate_disaster_trend_by_decade.csv")

# Define caption for all plots
caption <- "Source: The International Disaster Database (EM-DAT)"

# Normalise metrics for stacked bar chart
metrics <- c("Event.Count", "Total.Affected", "Total.Damage.Adjusted..000.USD.", "Total.Deaths")
data_normalised <- data
for (metric in metrics) {
  max_val <- max(data[[metric]], na.rm = TRUE)
  data_normalised[[metric]] <- data[[metric]] / max_val
}

# Create stacked bar chart
data_melted <- melt(data_normalised, id.vars = "Decade", measure.vars = metrics)
stacked_bar <- ggplot(data_melted, aes(x = factor(Decade, levels = sort(unique(Decade))), y = value, fill = variable)) +
  geom_bar(stat = "identity") +
  labs(title = "Normalised Disaster Metrics by Decade in Vietnam (1953-2024)",
       x = "Decade",
       y = "Normalised Value", caption = caption,
       fill = "Metric") +
  theme_minimal()
stacked_bar

# Load the dataset
disaster_data <- read_csv("disaster_in_vietnam.csv")
## Rows: 335 Columns: 46
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (22): DisNo., Historic, Classification Key, Disaster Group, Disaster Su...
## dbl  (20): AID Contribution ('000 US$), Magnitude, Latitude, Longitude, Star...
## lgl   (2): Reconstruction Costs ('000 US$), Reconstruction Costs, Adjusted (...
## date  (2): Entry Date, Last Update
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Rename columns for basic plots
data <- disaster_data %>%
  rename(
    Disaster_Type = `Disaster Type`,
    Start_Year = `Start Year`,
    Total_Deaths = `Total Deaths`,
    No_Affected = `No. Affected`,
    Total_Damage = `Total Damage ('000 US$)`
  )

# Summary of disaster types: total deaths and affected people
disaster_summary <- data %>%
  group_by(Disaster_Type) %>%
  summarise(
    Total_Deaths = sum(Total_Deaths, na.rm = TRUE),
    Total_Affected = sum(No_Affected, na.rm = TRUE)
  ) %>%
  arrange(desc(Total_Affected))  # Sort by number affected (or use desc(Total_Deaths) for sorting by deaths)

# Print summary table
print("Summary of Disaster Types: Total Affected and Total Deaths")
## [1] "Summary of Disaster Types: Total Affected and Total Deaths"
print(disaster_summary)
## # A tibble: 19 × 3
##    Disaster_Type                    Total_Deaths Total_Affected
##    <chr>                                   <dbl>          <dbl>
##  1 Storm                                   19727       51994789
##  2 Flood                                    6073       33137160
##  3 Drought                                     0        8545558
##  4 Epidemic                                 1167          28703
##  5 Fire (Miscellaneous)                      177           5000
##  6 Explosion (Industrial)                    114           4385
##  7 Fire (Industrial)                          18           3500
##  8 Water                                     609             75
##  9 Road                                      468              1
## 10 Air                                       269              0
## 11 Collapse (Industrial)                     453              0
## 12 Collapse (Miscellaneous)                   29              0
## 13 Explosion (Miscellaneous)                  47              0
## 14 Infestation                                 0              0
## 15 Mass movement (wet)                       330              0
## 16 Miscellaneous accident (General)           36              0
## 17 Poisoning                                 177              0
## 18 Rail                                       25              0
## 19 Wildfire                                    0              0
# Data Cleaning: Replace NA with 0 in key impact columns for advanced plots
disaster_data <- disaster_data %>%
  mutate(
    `Total Deaths` = replace(`Total Deaths`, is.na(`Total Deaths`), 0),
    `No. Injured` = replace(`No. Injured`, is.na(`No. Injured`), 0),
    `No. Affected` = replace(`No. Affected`, is.na(`No. Affected`), 0),
    `No. Homeless` = replace(`No. Homeless`, is.na(`No. Homeless`), 0),
    `Total Affected` = replace(`Total Affected`, is.na(`Total Affected`), 0),
    `Total Damage, Adjusted ('000 US$)` = replace(`Total Damage, Adjusted ('000 US$)`, is.na(`Total Damage, Adjusted ('000 US$)`), 0),
    `Insured Damage, Adjusted ('000 US$)` = replace(`Insured Damage, Adjusted ('000 US$)`, is.na(`Insured Damage, Adjusted ('000 US$)`), 0)
  )

# Add Decade column
disaster_data$Decade <- floor(disaster_data$`Start Year` / 10) * 10

# Prepare data for p1: Disaster frequency over time
disasters_by_year <- data %>%
  group_by(Start_Year) %>%
  summarise(Count = n())

# Create p1: Disaster frequency over time
p1 <- ggplot(disasters_by_year, aes(x = Start_Year, y = Count)) +
  geom_line(color = "blue") +
  geom_point(color = "blue") +
  geom_smooth(method = "lm", color = "red") +
  labs(title = "Number of Disasters in Vietnam Over Time (1953-2024)", 
       subtitle = "Plot 1",
       x = "Year", y = "Number of Disasters", caption = caption) +
  theme_minimal()

# Prepare data for p2: Economic damage over time
damage_by_year <- disaster_data %>%
  group_by(`Start Year`) %>%
  summarise(Total_Damage_Adjusted = sum(`Total Damage, Adjusted ('000 US$)`, na.rm = TRUE) / 1000)  # Convert to millions

# Create p2: Economic damage over time
p2 <- ggplot(damage_by_year, aes(x = `Start Year`, y = Total_Damage_Adjusted)) +
  geom_line(color = "blue") +
  geom_point(color = "blue") +
  geom_smooth(method = "lm", color = "red") +
  labs(title = "Adjusted Total Economic Damage from Disasters in Vietnam (1953-2024)",
       subtitle = "Plot 2",
       x = "Year", y = "Total Damage (Millions USD, Adjusted)", caption = caption) +
  theme_minimal() +
  scale_y_continuous(labels = dollar_format(prefix = "$", suffix = "M"))

# Prepare data for p3: Distribution of disaster types
disaster_types <- data %>%
  group_by(Disaster_Type) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count))

# Create p3: Distribution of disaster types
p3 <- ggplot(disaster_types, aes(x = reorder(Disaster_Type, -Count), y = Count)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(title = "Distribution of Disaster Types in Vietnam (1953-2024)", 
       subtitle = "Plot 3",
       x = "Disaster Type", y = "Count", caption = caption) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Prepare data for p4: Total deaths by disaster type
deaths_by_type <- data %>%
  group_by(Disaster_Type) %>%
  summarise(Total_Deaths_Sum = sum(Total_Deaths, na.rm = TRUE)) %>%
  arrange(desc(Total_Deaths_Sum))

# Create p4: Total deaths by disaster type
p4 <- ggplot(deaths_by_type, aes(x = reorder(Disaster_Type, -Total_Deaths_Sum), y = Total_Deaths_Sum)) +
  geom_bar(stat = "identity", fill = "darkred") +
  labs(title = "Total Deaths by Disaster Type in Vietnam (1953-2024)",
       subtitle = "Plot 4",
       x = "Disaster Type", y = "Total Deaths", caption = caption) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Prepare data for p5: Impact by disaster type
impact_by_type <- disaster_data %>%
  group_by(`Disaster Type`) %>%
  summarise(
    Total_Deaths = sum(`Total Deaths`, na.rm = TRUE),
    Total_Affected = sum(`Total Affected`, na.rm = TRUE) / 1000,  # Convert to thousands
    Total_Damage_Adjusted = sum(`Total Damage, Adjusted ('000 US$)`, na.rm = TRUE) / 1000  # Convert to millions
  ) %>%
  pivot_longer(cols = c(Total_Deaths, Total_Affected, Total_Damage_Adjusted), 
               names_to = "Impact", 
               values_to = "Value")

# Create p5: Impact by disaster type
p5 <- ggplot(impact_by_type, aes(x = `Disaster Type`, y = Value, fill = Impact)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(
    title = "Impact of Disasters by Type in Vietnam (1953-2024)",
    subtitle = "Plot 5",
    x = "Disaster Type",
    y = "Value (Deaths, Thousands Affected, Millions USD)",
    caption = caption,
    fill = "Impact Type"
  ) +
  scale_y_continuous(labels = comma)

# Prepare data for p6: Disasters by month and type
disasters_by_month <- disaster_data %>%
  filter(!is.na(`Start Month`)) %>%
  group_by(`Start Month`, `Disaster Type`) %>%
  summarise(Count = n()) %>%
  ungroup()
## `summarise()` has grouped output by 'Start Month'. You can override using the
## `.groups` argument.
# Create p6: Disasters by month and type
p6 <- ggplot(disasters_by_month, aes(x = `Start Month`, y = Count, fill = `Disaster Type`)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  labs(
    title = "Disasters in Vietnam by Month and Type (1953-2024)",
    subtitle = "Plot 6",
    x = "Month",
    y = "Number of Disasters",
    caption = caption,
    fill = "Disaster Type"
  ) +
  scale_x_continuous(breaks = 1:12, labels = month.abb)

# Create p7: Geographic distribution (Leaflet map)
geo_data <- disaster_data %>%
  filter(!is.na(Latitude) & !is.na(Longitude)) %>%
  mutate(Popup = paste(`Disaster Type`, "<br>", 
                       `Event Name`, "<br>", 
                       `Start Year`, "<br>", 
                       "Deaths:", `Total Deaths`, "<br>", 
                       "Affected:", `Total Affected`))

p7 <- leaflet(data = geo_data) %>%
  addTiles() %>%
  addCircles(
    lng = ~Longitude, 
    lat = ~Latitude, 
    radius = ~sqrt(`Total Affected`) * 1000,  # Scale radius by total affected
    popup = ~Popup,
    color = ~ifelse(`Disaster Group` == "Natural", "blue", "red"),
    fillOpacity = 0.5
  ) %>%
  addLegend(
    position = "bottomright",
    colors = c("blue", "red"),
    labels = c("Natural", "Technological"),
    title = "Disaster Group"
  )

# Prepare data for p8: Correlation heatmap
cor_data <- disaster_data %>%
  select(`Total Deaths`, `Total Affected`, `Total Damage, Adjusted ('000 US$)`) %>%
  filter(complete.cases(.))

cor_matrix <- cor(cor_data)

print("Correlation Matrix: Total Deaths, Total Affected, Total Damage; from Disasters in Vietnam (1953-2024)")
## [1] "Correlation Matrix: Total Deaths, Total Affected, Total Damage; from Disasters in Vietnam (1953-2024)"
print(cor_matrix)
##                                   Total Deaths Total Affected
## Total Deaths                         1.0000000      0.1166218
## Total Affected                       0.1166218      1.0000000
## Total Damage, Adjusted ('000 US$)    0.0840843      0.2032369
##                                   Total Damage, Adjusted ('000 US$)
## Total Deaths                                              0.0840843
## Total Affected                                            0.2032369
## Total Damage, Adjusted ('000 US$)                         1.0000000
# Create p8: Correlation heatmap
p8 <- ggcorrplot(cor_matrix, lab = TRUE, title = "Correlation Heatmap (Plot 8): Deaths, Affected, Damage; from Disasters in Vietnam (1950-2024)") +
                                         labs(caption = caption)

# Create p9: Scatter plot matrix
# Define custom functions for scatter plot matrix
custom_points <- function(data, mapping, ...) {
  ggplot(data = data, mapping = mapping) + 
    geom_point() +
    scale_x_continuous(labels = scales::comma) +
    scale_y_continuous(labels = scales::comma)
}

custom_density <- function(data, mapping, ...) {
  ggplot(data = data, mapping = mapping) + 
    geom_density() +
    scale_x_continuous(labels = scales::comma)
}

# Create p9: Scatter plot matrix with formatted axes
p9 <- ggpairs(cor_data, 
              lower = list(continuous = custom_points),
              diag = list(continuous = custom_density),
              title = "Scatter Plot Matrix (Plot 9): Disaster Impacts; from Disasters in Vietnam (1953-2024)") +
              labs(caption = caption) +
  theme_minimal()

# Correlation heatmaps by decade (Plot 8b)
# Check the number of observations per decade
decade_counts <- disaster_data %>%
  group_by(Decade) %>%
  summarise(n = n()) %>%
  filter(n >= 10)  # Only include decades with at least 10 observations

# Function to compute melted correlation matrix
compute_cor_melted <- function(data) {
  cor_mat <- cor(select(data, `Total Deaths`, `Total Affected`, `Total Damage, Adjusted ('000 US$)`), use = "complete.obs")
  cor_melted <- melt(cor_mat)
  return(cor_melted)
}

# Compute correlation matrices for each decade with sufficient data
cor_list <- disaster_data %>%
  filter(Decade %in% decade_counts$Decade) %>%
  group_by(Decade) %>%
  group_map(~compute_cor_melted(.x) %>% mutate(Decade = .y$Decade))

# Combine into a single data frame
cor_df <- do.call(rbind, cor_list)

# Create faceted heatmap
p8_decade <- ggplot(cor_df, aes(x = Var1, y = Var2, fill = value)) +
  geom_tile() +
  geom_text(aes(label = round(value, 2)), color = "black", size = 3) +
  facet_wrap(~Decade) +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, limits = c(-1, 1)) +
  labs(title = "Correlation Heatmaps by Decade (Plot 8b): Deaths, Affected, Damage; from Disasters in Vietnam (1980-2024)", 
       x = "", y = "", fill = "Correlation", caption = caption) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))


# Combine plots using grid.arrange

grid.arrange(p1, p2, nrow = 2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#grid.arrange(p3, p4, nrow = 2)
#grid.arrange(p5, p6, nrow = 2)

# Display standalone plots
# Distribution of disaster types
print(p3)

# Total deaths by disaster type
print(p4)

# Impact by disaster type
print(p5)

# Disasters by month and type
print(p6)

# Geographic distribution (Leaflet map)
p7
# p8: Correlation heat map
print(p8)

# Faceted correlation heatmaps by decade
print(p8_decade)

# Scatter plot matrix
print(p9)