knitr::opts_chunk$set(message = FALSE, warning = FALSE)
# Load packages for data import, cleaning, manipulation,
# visualization, and exploratory data analysis.
# Set a consistent plotting theme for all visualizations.

library(readr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(plotly)
library(tidyverse)
library(scales)
library(ggrepel)
library(stringr)
library(GGally)

theme_set(theme_minimal())

Data Preprocessing

lpi <- read.csv(                 # Read the Living Planet Index dataset from a CSV file
  "living-planet-index.csv",
  sep = ";"                      # Specify semicolon as the column separator
)

colnames(lpi) <- c(              # Assign meaningful names to the dataset columns
  "Region",
  "Year",
  "AverageIndex",
  "UpperIndex",
  "LowerIndex"
)

lpi$Year <- as.numeric(lpi$Year)                     # Convert Year to numeric format
lpi$AverageIndex <- as.numeric(lpi$AverageIndex)     # Convert AverageIndex to numeric format
lpi$UpperIndex <- as.numeric(lpi$UpperIndex)         # Convert UpperIndex to numeric format
lpi$LowerIndex <- as.numeric(lpi$LowerIndex)         # Convert LowerIndex to numeric format
species <- read.csv(                     # Load the species dataset
  "Species.csv",
  check.names = FALSE                    # Preserve original column names
)

species$Threatened <- as.numeric(        # Convert threatened species counts to numeric format
  gsub(",", "", species$`Subtotal (threatened spp.)`)
)

species$Total <- as.numeric(             # Convert total species counts to numeric format
  gsub(",", "", species$Total)
)

species$ThreatenedPercent <-             # Calculate the percentage of threatened species
  (species$Threatened / species$Total) * 100

species$EN <- as.numeric(                # Convert Endangered species counts to numeric format
  gsub(",", "", species$EN)
)

species$VU <- as.numeric(                # Convert Vulnerable species counts to numeric format
  gsub(",", "", species$VU)
)

species$CR <- as.numeric(                # Convert Critically Endangered species counts to numeric format
  gsub(",", "", species$CR)
)

species_clean <- species %>%             # Select the top 20 countries by threatened species percentage
  arrange(desc(ThreatenedPercent)) %>%
  slice(1:20)

top15 <- species %>%                     # Select the top 15 countries with the highest threatened species counts
  arrange(desc(Threatened)) %>%
  slice(1:15)

species_long <- top15 %>%                # Reshape the data into long format for visualization
  select(Name, VU, EN, CR) %>%
  pivot_longer(
    cols = c(VU, EN, CR),
    names_to = "ThreatLevel",
    values_to = "Count"
  )
protected <- read.csv(                           # Load the protected areas dataset
  "161a1ce8-f2e3-46c2-b830-1f9149c4f66d_Data.csv"
)

protected <- protected %>%
  select(                                        # Select relevant columns for analysis
    Country.Name,
    X2024..YR2024.
  ) %>%
  rename(                                        # Rename columns for better readability
    Country = Country.Name,
    ProtectedArea = X2024..YR2024.
  )

protected <- protected %>%
  filter(ProtectedArea != "..")                  # Remove rows with missing values

protected$ProtectedArea <- as.numeric(           # Convert protected area values to numeric format
  protected$ProtectedArea
)

top_protected <- protected %>%
  arrange(desc(ProtectedArea)) %>%               # Sort countries by protected area coverage
  slice(1:20)                                    # Select the top 20 countries
df <- read_csv("Biodiversity habitat loss (Williams et al. 2021).csv")   # Load the biodiversity habitat loss dataset

aggregates <- c("Europe", "Latin America", "North Africa", "North America", 
                "South and East Asia", "Sub-Saharan Africa", "World")    # Define regional aggregate entries to exclude

clean_story <- df %>%
  filter(!Entity %in% aggregates) %>%                                    # Remove aggregate regions and keep individual countries
  summarise(
    Amphibians_BAU = mean(bau_habitat_loss_amphibians, na.rm = TRUE),     # Calculate average habitat loss for amphibians under each scenario
    Amphibians_Waste = mean(waste_habitat_loss_amphibians, na.rm = TRUE),
    Amphibians_Diets = mean(diets_habitat_loss_amphibians, na.rm = TRUE),
    Amphibians_Yields = mean(yields_habitat_loss_amphibians, na.rm = TRUE),
    Amphibians_Combined = mean(combined_habitat_loss_amphibians, na.rm = TRUE),
    
    Birds_BAU = mean(bau_habitat_loss_birds, na.rm = TRUE),               # Calculate average habitat loss for birds under each scenario
    Birds_Waste = mean(waste_habitat_loss_birds, na.rm = TRUE),
    Birds_Diets = mean(diets_habitat_loss_birds, na.rm = TRUE),
    Birds_Yields = mean(yields_habitat_loss_birds, na.rm = TRUE),
    Birds_Combined = mean(combined_habitat_loss_birds, na.rm = TRUE),
    
    Mammals_BAU = mean(bau_habitat_loss_mammals, na.rm = TRUE),           # Calculate average habitat loss for mammals under each scenario
    Mammals_Waste = mean(waste_habitat_loss_mammals, na.rm = TRUE),
    Mammals_Diets = mean(diets_habitat_loss_mammals, na.rm = TRUE),
    Mammals_Yields = mean(yields_habitat_loss_mammals, na.rm = TRUE),
    Mammals_Combined = mean(combined_habitat_loss_mammals, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = everything(),
    names_to = c("Species", "Scenario"),
    names_sep = "_"                                                       # Convert wide-format data into long format
  ) %>%
  mutate(
    Scenario = recode(Scenario,                                           # Replace abbreviations with descriptive scenario names
                      "BAU" = "Business As Usual", 
                      "Waste" = "Reduce Food Waste", 
                      "Diets" = "Dietary Shifts", 
                      "Yields" = "Improve Crop Yields", 
                      "Combined" = "All Interventions Combined"),
    Scenario = factor(Scenario, levels = c(                              # Define the display order of scenarios
      "Business As Usual", 
      "Reduce Food Waste", 
      "Dietary Shifts", 
      "Improve Crop Yields", 
      "All Interventions Combined"
    ))
  )

Chart 1: Wildlife Populations Over Time (since 1970)

global_lpi <- lpi %>%
  group_by(Year) %>%                                # Group data by year
  summarise(
    AvgIndex = mean(
      AverageIndex,
      na.rm = TRUE                                  # Calculate the average Living Planet Index for each year
    )
  )

p1 <- ggplot(
  global_lpi,
  aes(
    x = Year,
    y = AvgIndex,
    text = paste(
      "Year:", Year,
      "<br>Index:", round(AvgIndex,1)               # Create custom tooltip information
    )
  )
) +
  geom_line(
    linewidth = 1.3,
    colour = "darkgreen"                            # Draw trend line showing index changes over time
  ) +
  geom_point(size = 2) +                            # Add data points for each year
  labs(
    title = "Wildlife Populations Over Time(Since 1970)",
    x = "Year",
    y = "Living Planet Index"                       # Add chart title and axis labels
  )

ggplotly(
  p1,
  tooltip = "text"                                  # Convert the plot into an interactive visualization
)

Chart 2: Biodiversity Loss Across Regions

latest <- lpi %>%
  filter(
    Year == max(Year)                               # Select data from the most recent year
  )

p2 <- ggplot(
  latest,
  aes(
    reorder(
      Region,
      AverageIndex
    ),                                              # Order regions by biodiversity index
    AverageIndex,
    text = paste(
      Region,
      "<br>Index:",
      round(AverageIndex,1)                         # Create custom tooltip information
    )
  )
) +
  geom_col(
    fill = "steelblue"                              # Create a bar chart of regional biodiversity indices
  ) +
  coord_flip() +                                    # Flip coordinates for easier comparison of regions
  labs(
    title = "Biodiversity By Region",
    x = "",
    y = "Index"                                     # Add chart title and axis labels
  )

ggplotly(
  p2,
  tooltip = "text"                                  # Convert the chart into an interactive visualization
)

Chart 3: Threat Composition Across Species Groups

species_heat <- species %>%
  select(Name, CR, EN, VU)                          # Select species names and threat-level categories

species_heat$CR <- as.numeric(gsub(",", "", species_heat$CR))   # Convert CR values to numeric format
species_heat$EN <- as.numeric(gsub(",", "", species_heat$EN))   # Convert EN values to numeric format
species_heat$VU <- as.numeric(gsub(",", "", species_heat$VU))   # Convert VU values to numeric format

species_heat <- species_heat %>%
  pivot_longer(
    cols = c(CR, EN, VU),
    names_to = "ThreatLevel",
    values_to = "Count"                             # Reshape data into long format for heatmap visualization
  )

top_species <- species %>%
  mutate(
    Threatened = as.numeric(
      gsub(",", "", `Subtotal (threatened spp.)`)   # Create numeric threatened species counts
    )
  ) %>%
  arrange(desc(Threatened)) %>%                     # Rank species groups by threatened counts
  slice(1:15)                                       # Select the top 15 most threatened groups

species_heat <- species_heat %>%
  filter(Name %in% top_species$Name)                # Keep only the selected top species groups

p3 <- ggplot(
  species_heat,
  aes(
    x = ThreatLevel,
    y = reorder(Name, Count),
    fill = Count                                    # Map threat counts to heatmap colors
  )
) +
  geom_tile() +                                     # Create heatmap tiles
  scale_fill_gradient(
    low = "#fee8c8",
    high = "#e34a33"                                # Apply color gradient based on threat counts
  ) +
  labs(
    title = "Threat Composition Across Species Groups"
  ) +
  theme(
    strip.text = element_text(size = 9),
    plot.title = element_text(size = 11),
    plot.subtitle = element_text(size = 7),
    panel.spacing = unit(2, "lines"),
    panel.grid.minor = element_blank()              # Improve plot readability and appearance
  )

ggplotly(
  p3,
  tooltip = c("x", "y")                             # Convert the heatmap into an interactive visualization
)

Chart 4: Countries Most In Need Of Conservation Action

priority_countries <- protected %>%
  filter(!is.na(ProtectedArea)) %>%                    # Remove missing protected area values
  arrange(ProtectedArea) %>%                          # Sort countries by lowest protected area first
  slice(1:20)                                          # Select 20 countries with least protection

p4 <- ggplot(
  priority_countries,
  aes(
    x = reorder(Country, ProtectedArea),
    y = ProtectedArea                                  # Map country vs protected area percentage
  )
) +

  geom_segment(
    aes(
      xend = Country,
      y = 0,
      yend = ProtectedArea                            # Create lollipop chart stems
    ),
    colour = "grey70"
  ) +

  geom_point(
    aes(
      colour = ProtectedArea,                         # Color points based on protection level
      size = ProtectedArea                            # Size points based on protection level
    )
  ) +

  coord_flip() +                                      # Flip axes for better readability

  labs(
    title = "Countries Most In Need Of Conservation Action",
    subtitle = "Countries with the smallest share of protected land",
    x = "",
    y = "Protected Area (%)"                         # Add titles and axis labels
  ) +

  theme_minimal() +                                   # Apply minimal theme for clean visualization
  theme(
    strip.text = element_text(size = 9),
    plot.title = element_text(size = 11),
    plot.subtitle = element_text(size = 7)            # Adjust text styling for readability
  )

ggplotly(
  p4,
  tooltip = c("x", "y")                               # Make plot interactive with tooltips
)

Chart 5: What Could Save Wildlife by 2050?

# Split data by animal class
data_amphibians <- clean_story %>% filter(Species == "Amphibians")   # Extract amphibian habitat data
data_birds      <- clean_story %>% filter(Species == "Birds")        # Extract bird habitat data
data_mammals    <- clean_story %>% filter(Species == "Mammals")      # Extract mammal habitat data

# Color mapping matching performance hierarchy (Red to Green)
scenario_colors <- c(
  "Business As Usual" = "#d73027", 
  "Reduce Food Waste" = "#f46d43", 
  "Dietary Shifts" = "#fdae61", 
  "Improve Crop Yields" = "#a6d96a", 
  "All Interventions Combined" = "#1a9850"
)                                                                  # Define consistent color scale for scenarios

# 2. Build individual subplots
create_subplot <- function(sub_data, title_text) {
  plot_ly(
    data = sub_data,
    y = ~Scenario,
    x = ~value,                                                    # Map habitat change values to x-axis
    type = 'bar',
    orientation = 'h',                                             # Horizontal bar chart
    color = ~Scenario,
    colors = scenario_colors,                                      # Apply custom color palette
    text = ~paste0(round(value, 2), "%"),                          # Display values on bars
    textposition = 'outside',
    textfont = list(size = 10), 
    hovertemplate = paste0(
      "<b>%{y}</b><br>Habitat Change: %{x:.2f}%<extra></extra>"   # Define hover tooltip format
    ),
    showlegend = FALSE
  ) %>%
    layout(
      # FIX: Changed xref and yref to "paper" and adjusted coordinates
      # This explicitly locks the heading right in the middle top of the column grid
      annotations = list(
        list(
          x = 0.5, y = 1.05, 
          text = paste0("<b>", title_text, "</b>"),
          xref = "paper", yref = "paper", 
          xanchor = "center", yanchor = "bottom",                  # Position subplot title
          showarrow = FALSE, 
          font = list(size = 13, color = "black")
        )
      ),
      xaxis = list(
        title = list(text = "Habitat Change (%)", standoff = 15),   # Label x-axis
        ticksuffix = "%"
      ),
      yaxis = list(
        title = "", 
        categoryorder = "array", 
        categoryarray = levels(sub_data$Scenario)                  # Maintain scenario order
      ),
      shapes = list(
        list(
          type = "line", 
          x0 = 0, x1 = 0, y0 = -0.5, y1 = 4.5, 
          line = list(color = "grey40", width = 1.5, dash = "dash") # Add reference line at zero
        )
      )
    )
}

p1 <- create_subplot(data_amphibians, "Amphibians")   # Create amphibian subplot
p2 <- create_subplot(data_birds, "Birds")             # Create bird subplot
p3 <- create_subplot(data_mammals, "Mammals")         # Create mammal subplot

# 3. Combine subplots and set structural spacing layout
interactive_chart <- subplot(
  p1, p2, p3,
  nrows = 1,
  shareY = TRUE,
  titleX = TRUE,
  margin = 0.06
) %>%
  layout(
    title = list(
      text = "<b>Which Action Actually Saves Wildlife Habitats by 2050?</b><br><span style='font-size:11px;color:gray;'>Global average projected habitat change. Negative values indicate habitat destruction.</span>",
      font = list(size = 14),
      x = 0.02                                                   # Position main title
    ),
    margin = list(t = 120, b = 70, l = 180, r = 30),             # Adjust overall plot spacing
    barmode = 'group'                                             # Group bars by scenario
  )

# View chart
interactive_chart                                                # Display final interactive visualization