library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
Step 1. Import the data
# Import dataset
co2 <- read_csv("co2_trend_scores.csv", show_col_types = FALSE)
# Preview structure
head(co2)
## # A tibble: 6 × 5
## ISO_A3 gas trend_type score_type score
## <chr> <chr> <chr> <chr> <dbl>
## 1 ESP co2 Long trend Weighted -9.51
## 2 GBR co2 Long trend Weighted 1.97
## 3 PRT co2 Long trend Weighted 0.742
## 4 BES co2 Long trend Weighted 0
## 5 UKR co2 Long trend Weighted -27.5
## 6 ZWE co2 Long trend Weighted 50.9
Step 2. Data preparation
# Standardize names just in case; then show NA count
names(co2) <- tolower(names(co2))
total_na <- sum(is.na(co2))
paste("Total missing values in the dataset:", total_na)
## [1] "Total missing values in the dataset: 2"
Step 3. Guiding question:
#Question: Which countries (by ISO_A3) show the highest CO₂ trend scores and which 5 countries are experiencing the steepest recent declines in their CO₂ emissions.
Step 4. Summarize with dplyr
# Average weighted score per country
co2_country <- co2 %>%
group_by(iso_a3) %>%
summarize(avg_score = mean(score, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(avg_score))
# Average weighted score by trend_type
co2_trendtype <- co2 %>%
group_by(trend_type) %>%
summarize(avg_score = mean(score, na.rm = TRUE), n = dplyr::n(), .groups = "drop") %>%
arrange(desc(avg_score))
head(co2_country, 10)
## # A tibble: 10 × 2
## iso_a3 avg_score
## <chr> <dbl>
## 1 MDV 100
## 2 SMR 100
## 3 GUM 97.6
## 4 SGP 51.9
## 5 GUY 50.8
## 6 GGY 50
## 7 JEY 50
## 8 NFK 50
## 9 WLF 49.1
## 10 TCA 42.2
co2_trendtype
## # A tibble: 2 × 3
## trend_type avg_score n
## <chr> <dbl> <int>
## 1 Recent changes 0.951 253
## 2 Long trend -5.57 253
Step 5. Key Insight 1 - Top Countries with overall highest weighted
scores
unique(co2$trend_type)
## [1] "Long trend" "Recent changes"
library(stringr)
co2_top5 <- co2 %>%
mutate(trend_type = str_trim(tolower(trend_type))) %>%
filter(trend_type == "long trend", gas == "co2") %>%
group_by(iso_a3) %>%
summarize(avg_score = mean(score, na.rm = TRUE)) %>%
arrange(desc(avg_score)) %>%
slice_head(n = 5)
if (nrow(co2_top5) > 0) {
countries1 <- paste(co2_top5$iso_a3, collapse = ", ")
cat(
"Insight 1: Looking at long-term CO₂ emission trends, ",
"the five countries with the highest average weighted scores are: ",
countries1, "."
)
} else {
cat("Insight 1: No rows matched the filter for long-term CO₂ emission trends in this dataset.")
}
## Insight 1: Looking at long-term CO₂ emission trends, the five countries with the highest average weighted scores are: MDV, NFK, SMR, GUM, WLF .
Step 6: Key Insight 2 - 5 countries with the most concerning recent
declines in CO₂ trends
# See what values actually exist (case & whitespace sensitive)
co2 %>% dplyr::count(gas, sort = TRUE)
## # A tibble: 1 × 2
## gas n
## <chr> <int>
## 1 co2 506
co2 %>% dplyr::count(trend_type, sort = TRUE)
## # A tibble: 2 × 2
## trend_type n
## <chr> <int>
## 1 Long trend 253
## 2 Recent changes 253
co2 <- co2 %>%
dplyr::mutate(
# replace Unicode subscript ₂ with "2", then lowercase/trim
gas = tolower(trimws(gsub("\u2082", "2", gas))),
trend_type = tolower(trimws(trend_type)),
score_type = tolower(trimws(score_type))
)
# Quick check again after normalization
co2 %>% dplyr::count(gas, sort = TRUE)
## # A tibble: 1 × 2
## gas n
## <chr> <int>
## 1 co2 506
co2 %>% dplyr::count(trend_type, sort = TRUE)
## # A tibble: 2 × 2
## trend_type n
## <chr> <int>
## 1 long trend 253
## 2 recent changes 253
co2_recent_declines <- co2 %>%
dplyr::filter(trend_type == "recent changes", gas == "co2") %>%
dplyr::group_by(iso_a3) %>%
dplyr::summarize(avg_score = mean(score, na.rm = TRUE), .groups = "drop") %>%
dplyr::arrange(avg_score) %>%
dplyr::slice_head(n = 5)
co2_recent_declines
## # A tibble: 5 × 2
## iso_a3 avg_score
## <chr> <dbl>
## 1 MAF -100
## 2 SPM -100
## 3 SXM -100
## 4 D07 -97.6
## 5 BHR -73.3
if (exists("co2_recent_declines") && nrow(co2_recent_declines) > 0) {
countries <- paste(co2_recent_declines$iso_a3, collapse = ", ")
cat(
"Insight 2: Restricted to recent CO₂ trends (trend_type = 'recent', gas = 'CO2'), the five countries with the most negative\n",
"average weighted scores—indicating the steepest short-term declines—are: ", countries, ".\n",
"These results highlight where recent emission cuts appear strongest in the short run."
)
} else {
cat("Insight 2: No rows matched the recent CO₂ filter in this dataset, so no countries could be listed.")
}
## Insight 2: Restricted to recent CO₂ trends (trend_type = 'recent', gas = 'CO2'), the five countries with the most negative
## average weighted scores—indicating the steepest short-term declines—are: MAF, SPM, SXM, D07, BHR .
## These results highlight where recent emission cuts appear strongest in the short run.
Step 7. Visualization with ggplot2
if (nrow(co2_recent_declines) > 0) {
ggplot(co2_recent_declines, aes(x = reorder(iso_a3, avg_score), y = avg_score)) +
geom_col(fill = "#e75480") +
coord_flip() +
labs(
title = "Top 5 Countries with Most Concerning Recent CO2 Trend Declines",
x = "Country (ISO_A3 code)",
y = "Average Weighted Score"
) +
theme_minimal()
} else {
message("No rows available to plot for Insight 2 after both strict and fallback filters.")
}
