DATA 608 Story 5

Load the data

This code loads and integrates global storm data with temperature records, mapping basin codes to their corresponding names for better interpretability and analysis.

# Ensure necessary libraries are loaded
library(dplyr)

# Read the storm and temperature data
storm_data_world <- read.csv("ibtracs.ALL.list.v04r01.csv")
temperature_data <- read.csv("temperature_data.csv")

# Convert SEASON to numeric in storm_data_world
storm_data_world <- storm_data_world %>%
  mutate(SEASON = as.numeric(SEASON))

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `SEASON = as.numeric(SEASON)`.
## Caused by warning:
## ! NAs introduced by coercion

# Merge storm data with temperature data based on SEASON and Year
storm_data_world <- storm_data_world %>%
  inner_join(temperature_data, by = c("SEASON" = "Year")) %>%
  rename(year = SEASON)  # Rename SEASON to Year in the merged dataset

# Create a mapping of basin codes to basin names
basin_mapping <- data.frame(
  BASIN = c("NA", "EP", "WP", "SI", "AU", "NI", "SA"),  # Add all relevant basin codes
  BASIN_NAME = c(
    "North Atlantic", "Eastern Pacific", "Western Pacific",
    "South Indian", "Australia", "Northern Indian", "South Atlantic"
  )  # Corresponding basin names
)

# Add the basin names to the storm_data_world dataset
storm_data_world <- storm_data_world %>%
  left_join(basin_mapping, by = "BASIN")

#glimpse(storm_data_world)

Create a data frame for max wind and speed for each storm

To create a data frame max_wind_data that includes both the highest wind speed (USA_WIND) and the highest storm speed (STORM_SPEED) for each unique SID, you can use the following R code:

# Ensure USA_WIND and STORM_SPEED are numeric
storm_data_world <- storm_data_world %>%
  mutate(
    USA_WIND = as.numeric(USA_WIND),
    STORM_SPEED = as.numeric(STORM_SPEED)
  )

# Group by SID and summarize to find the maximum values, handling missing data
max_wind_data <- storm_data_world %>%
  group_by(SID) %>%
  summarise(
    Max_Wind = ifelse(all(is.na(USA_WIND)), NA, max(USA_WIND, na.rm = TRUE)),
    Max_Storm_Speed = ifelse(all(is.na(STORM_SPEED)), NA, max(STORM_SPEED, na.rm = TRUE)),
    .groups = "drop"
  )

# Display the first few rows of max_wind_data
head(max_wind_data)

## # A tibble: 6 × 3
##   SID           Max_Wind Max_Storm_Speed
##   <chr>            <dbl>           <dbl>
## 1 1851080S15062       NA               8
## 2 1851080S15063       NA               8
## 3 1851080S21060       NA              11
## 4 1851080S21420       NA              12
## 5 1851175N26270       80              10
## 6 1851181N19275       NA               8

Visualizing Global Temperature Anomalies Over Time

This code visualizes the trend in global temperature anomalies, combining raw data and a smoothed line to highlight long-term climatic changes. The gray line represents yearly anomalies, while the red line smooths the data to reveal underlying patterns.

library(ggplot2)
library(dplyr)
library(zoo) # For rolling mean

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

# Add the anomaly data (ensuring there are no duplicates)
anomaly_data <- storm_data_world %>%
  select(year, Anomaly) %>%
  distinct()

# Smooth the temperature anomaly
anomaly_data <- anomaly_data %>%
  mutate(
    Smoothed_Anomaly = rollmean(Anomaly, k = 10, fill = NA, align = "center")
  )

# Plot temperature anomalies with the smoothed line
ggplot(anomaly_data, aes(x = year)) +
  geom_line(aes(y = Anomaly), color = "gray", size = 0.8, alpha = 0.7) +
  geom_line(aes(y = Smoothed_Anomaly), color = "red", size = 0.5) +
  labs(
    title = "Global Temperature Anomalies Over Time",
    x = "Year",
    y = "Temperature Anomaly (°C)"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_line()`).

Impact of Temperature Anomalies on Storm Speed and Intensity Over Time

This visualization examines the relationship between temperature anomalies, storm speed, and storm intensity over the years. Using scaled and smoothed data, it reveals how rising global temperatures have impacted cyclonic storm characteristics. The blue line shows smoothed storm speeds, the green line represents storm intensity, and the red line captures smoothed temperature anomalies. The primary y-axis reflects storm metrics, while the secondary y-axis displays temperature anomalies in °C.

library(ggplot2)
library(dplyr)
library(zoo) # For rolling mean

# Extract maximum storm speed and USA wind speed for each SID and year
storm_data <- storm_data_world %>%
  filter(!is.na(STORM_SPEED) & !is.na(USA_WIND)) %>%
  mutate(
    STORM_SPEED = as.numeric(STORM_SPEED),
    USA_WIND = as.numeric(USA_WIND)
  ) %>%
  group_by(SID, year) %>%
  summarise(
    Max_Storm_Speed = max(STORM_SPEED, na.rm = TRUE),
    Max_USA_Wind = max(USA_WIND, na.rm = TRUE),
    .groups = "drop"
  )

# Aggregate by year
yearly_data <- storm_data %>%
  group_by(year) %>%
  summarise(
    Max_Storm_Speed = max(Max_Storm_Speed, na.rm = TRUE),
    Max_USA_Wind = max(Max_USA_Wind, na.rm = TRUE),
    .groups = "drop"
  )

# Add temperature anomaly data
anomaly_data <- storm_data_world %>%
  select(year, Anomaly) %>%
  distinct()

combined_data <- yearly_data %>%
  left_join(anomaly_data, by = "year") %>%
  filter(!is.na(Anomaly)) # Remove years with missing anomaly data

# Scale and smooth the values
combined_data <- combined_data %>%
  mutate(
    Scaled_Storm_Speed = scale(Max_Storm_Speed),
    Scaled_USA_Wind = scale(Max_USA_Wind),
    Scaled_Anomaly = scale(Anomaly),
    Smoothed_Storm_Speed = rollmean(Scaled_Storm_Speed, k = 10, fill = NA, align = "center"),
    Smoothed_USA_Wind = rollmean(Scaled_USA_Wind, k = 10, fill = NA, align = "center"),
    Smoothed_Anomaly = rollmean(Scaled_Anomaly, k = 10, fill = NA, align = "center")
  )

# Define manual y-tick labels for primary axis (Max Storm Speed)
storm_speed_ticks <- pretty(combined_data$Max_Storm_Speed) # Generate pretty tick values for Max Storm Speed
scaled_storm_speed_ticks <- (storm_speed_ticks - mean(combined_data$Max_Storm_Speed)) / sd(combined_data$Max_Storm_Speed) # Scale the tick values

# Plot with secondary y-axis and manual tick replacements
ggplot(combined_data, aes(x = year)) +
  geom_line(aes(y = Smoothed_Storm_Speed, color = "Storm Speed"), size = 0.5) +
  geom_line(aes(y = Smoothed_USA_Wind, color = "Storm Intensity"), size = 0.5) +
  geom_line(aes(y = Smoothed_Anomaly, color = "Temperature Anomaly"), size = 0.5) +
  scale_y_continuous(
    name = "Storm Speed and Intensity (km/h)",
    breaks = scaled_storm_speed_ticks, # Use scaled Max Storm Speed values as breaks
    labels = storm_speed_ticks, # Display actual Max Storm Speed values as labels
    sec.axis = sec_axis(
      ~ . * sd(combined_data$Anomaly) + mean(combined_data$Anomaly), # Scale back anomaly to actual values
      name = "Temperature Anomaly (°C)"
    )
  ) +
  scale_color_manual(
    values = c(
      "Storm Speed" = "blue",
      "Storm Intensity" = "green",
      "Temperature Anomaly" = "red"
    ),
    name = "Metrics"
  ) +
  labs(
    title = "Impact of Temperature Anomalies on Storm Speed and Storm Intensitty  Over Time",
    x = "Year"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "top"
  )

## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 9 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 9 rows containing missing values or values outside the scale range
## (`geom_line()`).

Relationship Between Temperature Anomaly and Annual Maximum Wind Speed Over Time

This visualization highlights the connection between rising temperature anomalies and maximum annual wind speeds. The blue curve represents smoothed annual maximum wind speed (in knots), while the red curve reflects smoothed temperature anomalies (in °C). The primary y-axis is calibrated for wind speeds, and the secondary y-axis corresponds to temperature anomalies, using scaled values for the plot but displaying the actual values for tick labels. This dual-axis representation emphasizes the potential influence of global warming on the intensity of cyclonic storms over the years.

library(ggplot2)
library(dplyr)
library(zoo) # For rolling mean

# Extract maximum wind speed for each SID and year
max_wind_per_year <- storm_data_world %>%
  filter(!is.na(USA_WIND)) %>%
  mutate(USA_WIND = as.numeric(USA_WIND)) %>%
  group_by(SID, year) %>%
  summarise(Max_Wind = max(USA_WIND, na.rm = TRUE), .groups = "drop") %>%
  group_by(year) %>%
  summarise(Annual_Max_Wind = max(Max_Wind, na.rm = TRUE), .groups = "drop")

# Add the anomaly data (ensuring there are no duplicates)
anomaly_data <- storm_data_world %>%
  select(year, Anomaly) %>%
  distinct()

# Combine wind speed and anomaly data
combined_data <- max_wind_per_year %>%
  left_join(anomaly_data, by = "year") %>%
  filter(!is.na(Anomaly)) # Remove years with missing anomaly data

# Scale and center the values
combined_data <- combined_data %>%
  mutate(
    Scaled_Max_Wind = scale(Annual_Max_Wind),
    Scaled_Anomaly = scale(Anomaly),
    Smoothed_Max_Wind = rollmean(Scaled_Max_Wind, k = 10, fill = NA, align = "center"),
    Smoothed_Anomaly = rollmean(Scaled_Anomaly, k = 10, fill = NA, align = "center")
  )

# Define manual y-tick labels for primary axis (Max Wind Speed)
max_wind_ticks <- pretty(combined_data$Annual_Max_Wind) # Generate pretty tick values for Max Wind Speed
scaled_max_wind_ticks <- (max_wind_ticks - mean(combined_data$Annual_Max_Wind)) / sd(combined_data$Annual_Max_Wind) # Scale the tick values

# Plot with secondary y-axis and manual tick replacements
ggplot(combined_data, aes(x = year)) +
  geom_line(aes(y = Smoothed_Max_Wind, color = "Max Wind Speed (Smoothed)"), size = 0.5) +
  geom_line(aes(y = Smoothed_Anomaly, color = "Temperature Anomaly (Smoothed)"), size = 0.5) +
  scale_y_continuous(
    name = "Annual Maximum Wind Speed (knots)",
    breaks = scaled_max_wind_ticks, # Use scaled Max Wind values as breaks
    labels = max_wind_ticks, # Display actual Max Wind Speed values as labels
    sec.axis = sec_axis(
      ~ . * sd(combined_data$Anomaly) + mean(combined_data$Anomaly), # Scale back anomaly to actual values
      name = "Temperature Anomaly (°C)"
    )
  ) +
  scale_color_manual(
    values = c("Max Wind Speed (Smoothed)" = "blue", "Temperature Anomaly (Smoothed)" = "red"),
    name = "Metric"
  ) +
  labs(
    title = "Relationship Between Temperature Anomaly and Annual Maximum Wind Speed Over Time",
    x = "Year"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "top"
  )

## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 9 rows containing missing values or values outside the scale range
## (`geom_line()`).

Number of Storms Making Landfall Over the Years

This visualization shows the yearly trend of storms making landfall, derived from storm data where landfall is explicitly marked or determined based on distance to land. The blue line represents the number of storms that made landfall each year, and the red smoothed line highlights the overall trend over time. This analysis provides insights into the increasing frequency of landfalling storms and their potential implications on coastal regions.

library(dplyr)
library(ggplot2)

# Filter for storms that made landfall
storms_with_landfall <- storm_data_world %>%
  filter(
    (LANDFALL == 0 & DIST2LAND == 0) | # Storms that made landfall based on LANDFALL and DIST2LAND
    grepl("L", USA_RECORD)             # Explicit landfall marked in USA_RECORD
  ) %>%
  distinct(SID, year) # Keep unique storms per year

# Count the number of storms that made landfall per year
landfall_count_per_year <- storms_with_landfall %>%
  group_by(year) %>%
  summarise(Landfall_Count = n(), .groups = "drop")

# Plot the landfall counts over the years
ggplot(landfall_count_per_year, aes(x = year, y = Landfall_Count)) +
  geom_line(color = "blue", size = 0.7) +
  geom_smooth(method = "loess", color = "red", size = 0.5) + # Smoothed trend line
  labs(
    title = "Number of Storms Making Landfall Over the Years",
    x = "Year",
    y = "Number of Storms"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.grid.major = element_line(color = "gray90")
  )

## `geom_smooth()` using formula = 'y ~ x'

Destructive Potential Index (DPI) for storms

In this analysis, we calculate and visualize the Destructive Potential Index (DPI) for storms categorized 1 to 5. The DPI combines multiple storm characteristics to assess the potential destructiveness of storms. By aggregating the DPI values across all storms in a year, we aim to observe broader trends in destructive potential over time.

Formula

The DPI formula is defined as:

\[ DPI = (0.3 \times \text{Normalized Storm Speed}) + (0.7 \times \text{Normalized Wind Speed}) \times \text{Land Proximity Factor} \]

Explanation of Each Term:

Normalized Storm Speed:
- The translational speed of the storm (how fast it moves).
- Normalized to a [0,1] scale.
Normalized Wind Speed:
- The maximum sustained wind speed within the storm.
- Normalized to a [0,1] scale.
Land Proximity Factor:
- Calculated as \(\frac{1}{\text{DIST2LAND} + 1}\), where DIST2LAND is the distance of the storm to the nearest landmass (in kilometers).
- Gives higher weight to storms closer to land due to increased potential for damage.

Weights:

0.3 for Normalized Storm Speed: Moderate impact on destructiveness.
0.7 for Normalized Wind Speed: Stronger determinant of destructiveness.
Land Proximity Factor: Multiplies the contribution based on proximity to land.

Code

Conclusion

This analysis shows how the Total Destructive Potential Index (DPI) has varied over time, reflecting the cumulative destructive potential of storms each year. The inclusion of storm speed, wind speed, and land proximity provides a comprehensive measure of destructiveness.

Analysis of the plot

This visualization demonstrates the Total Destructive Potential Index (DPI) aggregated yearly for storms categorized from 1 to 5 in the dataset. A key observation from the plot is:

Upward Trend: The red dashed trend line suggests an overall increase in the total DPI over time. This indicates that the cumulative destructive potential of storms has generally risen since the mid-19th century.
Fluctuations: While the DPI shows year-to-year variability, several spikes highlight years with highly destructive storms. These spikes could correspond to major storm events or seasons with multiple impactful storms.
Recent Decades: In the latter half of the 20th century and early 21st century, the DPI appears to have reached higher peaks compared to earlier periods, aligning with the increase in storm intensity and frequency reported in recent studies.

This analysis highlights the importance of considering both natural variability and potential climate-related factors influencing storm behavior over time.

library(ggplot2)
library(dplyr)
library(zoo) # For moving average

# Normalize function
normalize <- function(x) {
  return((x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE)))
}

# Calculate DPI for each storm
storm_dpi <- storm_data_world %>%
  filter(!is.na(STORM_SPEED) & !is.na(USA_WIND) & !is.na(DIST2LAND)) %>%
  mutate(
    STORM_SPEED = as.numeric(STORM_SPEED),
    USA_WIND = as.numeric(USA_WIND),
    DIST2LAND = as.numeric(DIST2LAND),
    Norm_Storm_Speed = normalize(STORM_SPEED),
    Norm_Wind_Speed = normalize(USA_WIND),
    Land_Proximity_Factor = 1 / (DIST2LAND + 1),
    DPI = (0.3 * Norm_Storm_Speed) + (0.7 * Norm_Wind_Speed) * Land_Proximity_Factor
  )

# Aggregate DPI by year
annual_dpi <- storm_dpi %>%
  group_by(year) %>%
  summarise(
    Total_DPI = sum(DPI, na.rm = TRUE),
    .groups = "drop"
  )

# Add anomaly data
anomaly_data <- storm_data_world %>%
  select(year, Anomaly) %>%
  distinct()

# Combine DPI and anomaly data
combined_dpi_data <- annual_dpi %>%
  left_join(anomaly_data, by = "year") %>%
  filter(!is.na(Anomaly))

# Scale and smooth the values
combined_dpi_data <- combined_dpi_data %>%
  mutate(
    Scaled_DPI = scale(Total_DPI),
    Scaled_Anomaly = scale(Anomaly),
    Smoothed_DPI = rollmean(Scaled_DPI, k = 10, fill = NA, align = "center"),
    Smoothed_Anomaly = rollmean(Scaled_Anomaly, k = 10, fill = NA, align = "center")
  )

# Define manual y-tick labels for primary axis (DPI)
dpi_ticks <- pretty(combined_dpi_data$Total_DPI) # Generate pretty tick values for DPI
scaled_dpi_ticks <- (dpi_ticks - mean(combined_dpi_data$Total_DPI)) / sd(combined_dpi_data$Total_DPI) # Scale the tick values

# Plot with secondary y-axis and manual tick replacements
ggplot(combined_dpi_data, aes(x = year)) +
  geom_line(aes(y = Smoothed_DPI, color = "DPI (Smoothed)"), size = 0.5) +
  geom_line(aes(y = Smoothed_Anomaly, color = "Temperature Anomaly (Smoothed)"), size = 0.5) +
  scale_y_continuous(
    name = "Total DPI",
    breaks = scaled_dpi_ticks, # Use scaled DPI values as breaks
    labels = dpi_ticks, # Display actual DPI values as labels
    sec.axis = sec_axis(
      ~ . * sd(combined_dpi_data$Anomaly) + mean(combined_dpi_data$Anomaly), # Scale back anomaly to actual values
      name = "Temperature Anomaly (°C)"
    )
  ) +
  scale_color_manual(
    values = c("DPI (Smoothed)" = "purple", "Temperature Anomaly (Smoothed)" = "red"),
    name = "Metric"
  ) +
  labs(
    title = "Temperature Anomaly and Annual DPI Over Time",
    x = "Year"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "top"
  )

## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 9 rows containing missing values or values outside the scale range
## (`geom_line()`).

Temperature Anomaly vs Cyclone Frequency

This scatterplot examines the relationship between global temperature anomalies and cyclone frequency. Each black point represents a year’s average temperature anomaly plotted against the total number of cyclones that occurred during that year. The red line represents a linear regression trend, illustrating the relationship between the two variables. This visualization highlights how changes in Earth’s temperature anomalies may influence the frequency of cyclonic storms.

# Group by year to calculate cyclone frequency
cyclone_frequency <- storm_data_world %>%
  group_by(year) %>%
  summarise(
    Avg_Temperature_Anomaly = mean(Anomaly, na.rm = TRUE),
    Cyclone_Frequency = n()
  )

# Plot
ggplot(cyclone_frequency, aes(x = Avg_Temperature_Anomaly, y = Cyclone_Frequency)) +
  geom_point(color = "black", alpha = 0.6) +
  geom_smooth(method = "lm", color = "red", se = FALSE, size = 0.5) +
  labs(
    title = "Temperature Anomaly vs Cyclone Frequency",
    x = "Temperature Anomaly (°C)",
    y = "Cyclone Frequency"
  ) +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

Temperature Anomaly and Cyclone Frequency Over Time

This line graph compares smoothed trends in cyclone frequency and temperature anomalies over the years. The blue line represents the smoothed cyclone frequency, while the red line shows the smoothed temperature anomaly. The primary y-axis represents cyclone frequency with its original values manually labeled, while the secondary y-axis depicts temperature anomalies in degrees Celsius. This visualization illustrates the correlation between rising global temperatures and changes in cyclone activity.

library(ggplot2)
library(dplyr)
library(zoo) # For moving averages

# Calculate yearly cyclone frequency
cyclone_frequency <- storm_data_world %>%
  group_by(year) %>%
  summarise(Cyclone_Frequency = n(), .groups = "drop")

# Add temperature anomaly data
anomaly_data <- storm_data_world %>%
  select(year, Anomaly) %>%
  distinct()

# Combine cyclone frequency and anomaly data
combined_data <- cyclone_frequency %>%
  left_join(anomaly_data, by = "year") %>%
  filter(!is.na(Anomaly)) # Remove missing anomaly data

# Create scaled columns for analysis
combined_data <- combined_data %>%
  mutate(
    Scaled_Frequency = scale(Cyclone_Frequency),
    Scaled_Anomaly = scale(Anomaly),
    Smoothed_Frequency = rollmean(Scaled_Frequency, k = 10, fill = NA, align = "center"),
    Smoothed_Anomaly = rollmean(Scaled_Anomaly, k = 10, fill = NA, align = "center")
  )

# Define manual y-tick labels for the primary axis
frequency_ticks <- pretty(combined_data$Cyclone_Frequency) # Get pretty tick values for cyclone frequency
scaled_frequency_ticks <- scale(frequency_ticks) # Scale the frequency ticks to match the plot
scaled_frequency_ticks <- (frequency_ticks - mean(combined_data$Cyclone_Frequency)) / 
  sd(combined_data$Cyclone_Frequency)

# Plot with manual ticks
ggplot(combined_data, aes(x = year)) +
  geom_line(aes(y = Smoothed_Frequency, color = "Cyclone Frequency (Smoothed)"), size = 0.5) +
  geom_line(aes(y = Smoothed_Anomaly, color = "Temperature Anomaly (Smoothed)"), size = 0.5) +
  scale_y_continuous(
    name = "Cyclone Frequency",
    breaks = scaled_frequency_ticks, # Use the scaled values for the breaks
    labels = frequency_ticks, # Use the original values for the labels
    sec.axis = sec_axis(
      ~ . * sd(combined_data$Anomaly) + mean(combined_data$Anomaly),
      name = "Temperature Anomaly (°C)"
    )
  ) +
  scale_color_manual(
    values = c("Cyclone Frequency (Smoothed)" = "blue", "Temperature Anomaly (Smoothed)" = "red"),
    name = "Metric"
  ) +
  labs(
    title = "Temperature Anomaly and Cyclone Frequency Over Time",
    x = "Year"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "top"
  )

## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 9 rows containing missing values or values outside the scale range
## (`geom_line()`).

Rising Earth’s Temperature and Its Impact on Cyclonic Storm Metrics

This visualization effectively links rising Earth’s temperatures with the increased frequency, intensity, speed, and destructiveness of cyclonic storms over time. The heatmap illustrates how temperature anomalies correlate with these escalating storm metrics, emphasizing the broader impacts of climate change.

# Define the DPI formula and add it to the storm_data_world dataset
storm_data_world <- storm_data_world %>%
  filter(!is.na(STORM_SPEED) & !is.na(USA_WIND) & !is.na(DIST2LAND)) %>% # Filter valid data
  mutate(
    STORM_SPEED = as.numeric(STORM_SPEED), # Convert storm speed to numeric
    USA_WIND = as.numeric(USA_WIND),       # Convert wind speed to numeric
    DIST2LAND = as.numeric(DIST2LAND),     # Convert distance to land to numeric
    Norm_Storm_Speed = (STORM_SPEED - min(STORM_SPEED, na.rm = TRUE)) / (max(STORM_SPEED, na.rm = TRUE) - min(STORM_SPEED, na.rm = TRUE)), # Normalize storm speed
    Norm_Wind_Speed = (USA_WIND - min(USA_WIND, na.rm = TRUE)) / (max(USA_WIND, na.rm = TRUE) - min(USA_WIND, na.rm = TRUE)),             # Normalize wind speed
    Land_Proximity_Factor = 1 / (DIST2LAND + 1), # Closer storms get higher weight
    DPI = (0.3 * Norm_Storm_Speed) + (0.7 * Norm_Wind_Speed * Land_Proximity_Factor) # Calculate DPI
  )

# Now, storm_data_world contains a DPI column for each record

# Load required libraries
library(ggplot2)
library(dplyr)
library(tidyr)

# Aggregate the data by year and calculate maximum values for each metric
max_metrics_per_year <- storm_data_world %>%
  group_by(year) %>%
  summarise(
    Max_DPI = max(DPI, na.rm = TRUE),
    Max_Storm_Frequency = n_distinct(SID),
    Max_Storm_Speed = max(STORM_SPEED, na.rm = TRUE),
    Max_Wind_Speed = max(USA_WIND, na.rm = TRUE),
    Avg_Anomaly = mean(Anomaly, na.rm = TRUE),  # Calculate average anomaly for the legend
    .groups = "drop"
  )

# Scale the data for all metrics
scaled_data <- max_metrics_per_year %>%
  mutate(
    Scaled_DPI = scale(Max_DPI),
    Scaled_Storm_Frequency = scale(Max_Storm_Frequency),
    Scaled_Storm_Speed = scale(Max_Storm_Speed),
    Scaled_Wind_Speed = scale(Max_Wind_Speed)
  ) %>%
  select(year, Scaled_DPI, Scaled_Storm_Frequency, Scaled_Storm_Speed, Scaled_Wind_Speed, Avg_Anomaly) %>%
  pivot_longer(
    cols = starts_with("Scaled"),
    names_to = "Metric",
    values_to = "Value"
  )

# Rename metrics for better readability in the plot
scaled_data$Metric <- recode(scaled_data$Metric,
                             "Scaled_DPI" = "DPI",
                             "Scaled_Storm_Frequency" = "Storm Frequency",
                             "Scaled_Storm_Speed" = "Storm Speed",
                             "Scaled_Wind_Speed" = "Wind Speed")

# Create the heatmap
ggplot(scaled_data, aes(x = Metric, y = year, fill = Value)) +
  geom_tile(color = "white") +  # Add white borders for better separation
  scale_fill_gradient2(
    low = "blue", mid = "white", high = "red",
    midpoint = 0, name = "Scaled Value"
  ) +
  labs(
    title = "Rising earth's temperature has led to increase in frequency, \nintensity, speed and destructiveness of cyclonic storms",
    x = "Metrics",
    y = "Year"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 14, face = "bold")
  )