knitr::opts_chunk$set(message = FALSE, warning = FALSE)
# Load packages for data import, cleaning, manipulation,
# visualization, and exploratory data analysis.
# Set a consistent plotting theme for all visualizations.
library(readr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(plotly)
library(tidyverse)
library(scales)
library(ggrepel)
library(stringr)
library(GGally)
theme_set(theme_minimal())
Data Preprocessing
lpi <- read.csv( # Read the Living Planet Index dataset from a CSV file
"living-planet-index.csv",
sep = ";" # Specify semicolon as the column separator
)
colnames(lpi) <- c( # Assign meaningful names to the dataset columns
"Region",
"Year",
"AverageIndex",
"UpperIndex",
"LowerIndex"
)
lpi$Year <- as.numeric(lpi$Year) # Convert Year to numeric format
lpi$AverageIndex <- as.numeric(lpi$AverageIndex) # Convert AverageIndex to numeric format
lpi$UpperIndex <- as.numeric(lpi$UpperIndex) # Convert UpperIndex to numeric format
lpi$LowerIndex <- as.numeric(lpi$LowerIndex) # Convert LowerIndex to numeric format
species <- read.csv( # Load the species dataset
"Species.csv",
check.names = FALSE # Preserve original column names
)
species$Threatened <- as.numeric( # Convert threatened species counts to numeric format
gsub(",", "", species$`Subtotal (threatened spp.)`)
)
species$Total <- as.numeric( # Convert total species counts to numeric format
gsub(",", "", species$Total)
)
species$ThreatenedPercent <- # Calculate the percentage of threatened species
(species$Threatened / species$Total) * 100
species$EN <- as.numeric( # Convert Endangered species counts to numeric format
gsub(",", "", species$EN)
)
species$VU <- as.numeric( # Convert Vulnerable species counts to numeric format
gsub(",", "", species$VU)
)
species$CR <- as.numeric( # Convert Critically Endangered species counts to numeric format
gsub(",", "", species$CR)
)
species_clean <- species %>% # Select the top 20 countries by threatened species percentage
arrange(desc(ThreatenedPercent)) %>%
slice(1:20)
top15 <- species %>% # Select the top 15 countries with the highest threatened species counts
arrange(desc(Threatened)) %>%
slice(1:15)
species_long <- top15 %>% # Reshape the data into long format for visualization
select(Name, VU, EN, CR) %>%
pivot_longer(
cols = c(VU, EN, CR),
names_to = "ThreatLevel",
values_to = "Count"
)
protected <- read.csv( # Load the protected areas dataset
"161a1ce8-f2e3-46c2-b830-1f9149c4f66d_Data.csv"
)
protected <- protected %>%
select( # Select relevant columns for analysis
Country.Name,
X2024..YR2024.
) %>%
rename( # Rename columns for better readability
Country = Country.Name,
ProtectedArea = X2024..YR2024.
)
protected <- protected %>%
filter(ProtectedArea != "..") # Remove rows with missing values
protected$ProtectedArea <- as.numeric( # Convert protected area values to numeric format
protected$ProtectedArea
)
top_protected <- protected %>%
arrange(desc(ProtectedArea)) %>% # Sort countries by protected area coverage
slice(1:20) # Select the top 20 countries
df <- read_csv("Biodiversity habitat loss (Williams et al. 2021).csv") # Load the biodiversity habitat loss dataset
aggregates <- c("Europe", "Latin America", "North Africa", "North America",
"South and East Asia", "Sub-Saharan Africa", "World") # Define regional aggregate entries to exclude
clean_story <- df %>%
filter(!Entity %in% aggregates) %>% # Remove aggregate regions and keep individual countries
summarise(
Amphibians_BAU = mean(bau_habitat_loss_amphibians, na.rm = TRUE), # Calculate average habitat loss for amphibians under each scenario
Amphibians_Waste = mean(waste_habitat_loss_amphibians, na.rm = TRUE),
Amphibians_Diets = mean(diets_habitat_loss_amphibians, na.rm = TRUE),
Amphibians_Yields = mean(yields_habitat_loss_amphibians, na.rm = TRUE),
Amphibians_Combined = mean(combined_habitat_loss_amphibians, na.rm = TRUE),
Birds_BAU = mean(bau_habitat_loss_birds, na.rm = TRUE), # Calculate average habitat loss for birds under each scenario
Birds_Waste = mean(waste_habitat_loss_birds, na.rm = TRUE),
Birds_Diets = mean(diets_habitat_loss_birds, na.rm = TRUE),
Birds_Yields = mean(yields_habitat_loss_birds, na.rm = TRUE),
Birds_Combined = mean(combined_habitat_loss_birds, na.rm = TRUE),
Mammals_BAU = mean(bau_habitat_loss_mammals, na.rm = TRUE), # Calculate average habitat loss for mammals under each scenario
Mammals_Waste = mean(waste_habitat_loss_mammals, na.rm = TRUE),
Mammals_Diets = mean(diets_habitat_loss_mammals, na.rm = TRUE),
Mammals_Yields = mean(yields_habitat_loss_mammals, na.rm = TRUE),
Mammals_Combined = mean(combined_habitat_loss_mammals, na.rm = TRUE)
) %>%
pivot_longer(
cols = everything(),
names_to = c("Species", "Scenario"),
names_sep = "_" # Convert wide-format data into long format
) %>%
mutate(
Scenario = recode(Scenario, # Replace abbreviations with descriptive scenario names
"BAU" = "Business As Usual",
"Waste" = "Reduce Food Waste",
"Diets" = "Dietary Shifts",
"Yields" = "Improve Crop Yields",
"Combined" = "All Interventions Combined"),
Scenario = factor(Scenario, levels = c( # Define the display order of scenarios
"Business As Usual",
"Reduce Food Waste",
"Dietary Shifts",
"Improve Crop Yields",
"All Interventions Combined"
))
)
Chart 1: Wildlife Populations Over Time (since 1970)
global_lpi <- lpi %>%
group_by(Year) %>% # Group data by year
summarise(
AvgIndex = mean(
AverageIndex,
na.rm = TRUE # Calculate the average Living Planet Index for each year
)
)
p1 <- ggplot(
global_lpi,
aes(
x = Year,
y = AvgIndex,
text = paste(
"Year:", Year,
"<br>Index:", round(AvgIndex,1) # Create custom tooltip information
)
)
) +
geom_line(
linewidth = 1.3,
colour = "darkgreen" # Draw trend line showing index changes over time
) +
geom_point(size = 2) + # Add data points for each year
labs(
title = "Wildlife Populations Over Time(Since 1970)",
x = "Year",
y = "Living Planet Index" # Add chart title and axis labels
)
ggplotly(
p1,
tooltip = "text" # Convert the plot into an interactive visualization
)
Chart 2: Biodiversity Loss Across Regions
latest <- lpi %>%
filter(
Year == max(Year) # Select data from the most recent year
)
p2 <- ggplot(
latest,
aes(
reorder(
Region,
AverageIndex
), # Order regions by biodiversity index
AverageIndex,
text = paste(
Region,
"<br>Index:",
round(AverageIndex,1) # Create custom tooltip information
)
)
) +
geom_col(
fill = "steelblue" # Create a bar chart of regional biodiversity indices
) +
coord_flip() + # Flip coordinates for easier comparison of regions
labs(
title = "Biodiversity By Region",
x = "",
y = "Index" # Add chart title and axis labels
)
ggplotly(
p2,
tooltip = "text" # Convert the chart into an interactive visualization
)
Chart 3: Threat Composition Across Species Groups
species_heat <- species %>%
select(Name, CR, EN, VU) # Select species names and threat-level categories
species_heat$CR <- as.numeric(gsub(",", "", species_heat$CR)) # Convert CR values to numeric format
species_heat$EN <- as.numeric(gsub(",", "", species_heat$EN)) # Convert EN values to numeric format
species_heat$VU <- as.numeric(gsub(",", "", species_heat$VU)) # Convert VU values to numeric format
species_heat <- species_heat %>%
pivot_longer(
cols = c(CR, EN, VU),
names_to = "ThreatLevel",
values_to = "Count" # Reshape data into long format for heatmap visualization
)
top_species <- species %>%
mutate(
Threatened = as.numeric(
gsub(",", "", `Subtotal (threatened spp.)`) # Create numeric threatened species counts
)
) %>%
arrange(desc(Threatened)) %>% # Rank species groups by threatened counts
slice(1:15) # Select the top 15 most threatened groups
species_heat <- species_heat %>%
filter(Name %in% top_species$Name) # Keep only the selected top species groups
p3 <- ggplot(
species_heat,
aes(
x = ThreatLevel,
y = reorder(Name, Count),
fill = Count # Map threat counts to heatmap colors
)
) +
geom_tile() + # Create heatmap tiles
scale_fill_gradient(
low = "#fee8c8",
high = "#e34a33" # Apply color gradient based on threat counts
) +
labs(
title = "Threat Composition Across Species Groups"
) +
theme(
strip.text = element_text(size = 9),
plot.title = element_text(size = 11),
plot.subtitle = element_text(size = 7),
panel.spacing = unit(2, "lines"),
panel.grid.minor = element_blank() # Improve plot readability and appearance
)
ggplotly(
p3,
tooltip = c("x", "y") # Convert the heatmap into an interactive visualization
)
Chart 4: Countries Most In Need Of Conservation Action
priority_countries <- protected %>%
filter(!is.na(ProtectedArea)) %>% # Remove missing protected area values
arrange(ProtectedArea) %>% # Sort countries by lowest protected area first
slice(1:20) # Select 20 countries with least protection
p4 <- ggplot(
priority_countries,
aes(
x = reorder(Country, ProtectedArea),
y = ProtectedArea # Map country vs protected area percentage
)
) +
geom_segment(
aes(
xend = Country,
y = 0,
yend = ProtectedArea # Create lollipop chart stems
),
colour = "grey70"
) +
geom_point(
aes(
colour = ProtectedArea, # Color points based on protection level
size = ProtectedArea # Size points based on protection level
)
) +
coord_flip() + # Flip axes for better readability
labs(
title = "Countries Most In Need Of Conservation Action",
subtitle = "Countries with the smallest share of protected land",
x = "",
y = "Protected Area (%)" # Add titles and axis labels
) +
theme_minimal() + # Apply minimal theme for clean visualization
theme(
strip.text = element_text(size = 9),
plot.title = element_text(size = 11),
plot.subtitle = element_text(size = 7) # Adjust text styling for readability
)
ggplotly(
p4,
tooltip = c("x", "y") # Make plot interactive with tooltips
)
Chart 5: What Could Save Wildlife by 2050?
# Split data by animal class
data_amphibians <- clean_story %>% filter(Species == "Amphibians") # Extract amphibian habitat data
data_birds <- clean_story %>% filter(Species == "Birds") # Extract bird habitat data
data_mammals <- clean_story %>% filter(Species == "Mammals") # Extract mammal habitat data
# Color mapping matching performance hierarchy (Red to Green)
scenario_colors <- c(
"Business As Usual" = "#d73027",
"Reduce Food Waste" = "#f46d43",
"Dietary Shifts" = "#fdae61",
"Improve Crop Yields" = "#a6d96a",
"All Interventions Combined" = "#1a9850"
) # Define consistent color scale for scenarios
# 2. Build individual subplots
create_subplot <- function(sub_data, title_text) {
plot_ly(
data = sub_data,
y = ~Scenario,
x = ~value, # Map habitat change values to x-axis
type = 'bar',
orientation = 'h', # Horizontal bar chart
color = ~Scenario,
colors = scenario_colors, # Apply custom color palette
text = ~paste0(round(value, 2), "%"), # Display values on bars
textposition = 'outside',
textfont = list(size = 10),
hovertemplate = paste0(
"<b>%{y}</b><br>Habitat Change: %{x:.2f}%<extra></extra>" # Define hover tooltip format
),
showlegend = FALSE
) %>%
layout(
# FIX: Changed xref and yref to "paper" and adjusted coordinates
# This explicitly locks the heading right in the middle top of the column grid
annotations = list(
list(
x = 0.5, y = 1.05,
text = paste0("<b>", title_text, "</b>"),
xref = "paper", yref = "paper",
xanchor = "center", yanchor = "bottom", # Position subplot title
showarrow = FALSE,
font = list(size = 13, color = "black")
)
),
xaxis = list(
title = list(text = "Habitat Change (%)", standoff = 15), # Label x-axis
ticksuffix = "%"
),
yaxis = list(
title = "",
categoryorder = "array",
categoryarray = levels(sub_data$Scenario) # Maintain scenario order
),
shapes = list(
list(
type = "line",
x0 = 0, x1 = 0, y0 = -0.5, y1 = 4.5,
line = list(color = "grey40", width = 1.5, dash = "dash") # Add reference line at zero
)
)
)
}
p1 <- create_subplot(data_amphibians, "Amphibians") # Create amphibian subplot
p2 <- create_subplot(data_birds, "Birds") # Create bird subplot
p3 <- create_subplot(data_mammals, "Mammals") # Create mammal subplot
# 3. Combine subplots and set structural spacing layout
interactive_chart <- subplot(
p1, p2, p3,
nrows = 1,
shareY = TRUE,
titleX = TRUE,
margin = 0.06
) %>%
layout(
title = list(
text = "<b>Which Action Actually Saves Wildlife Habitats by 2050?</b><br><span style='font-size:11px;color:gray;'>Global average projected habitat change. Negative values indicate habitat destruction.</span>",
font = list(size = 14),
x = 0.02 # Position main title
),
margin = list(t = 120, b = 70, l = 180, r = 30), # Adjust overall plot spacing
barmode = 'group' # Group bars by scenario
)
# View chart
interactive_chart # Display final interactive visualization