library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(ggplot2)

Step 1. Import the data

# Import dataset
co2 <- read_csv("co2_trend_scores.csv", show_col_types = FALSE)

# Preview structure
head(co2)

## # A tibble: 6 × 5
##   ISO_A3 gas   trend_type score_type   score
##   <chr>  <chr> <chr>      <chr>        <dbl>
## 1 ESP    co2   Long trend Weighted    -9.51 
## 2 GBR    co2   Long trend Weighted     1.97 
## 3 PRT    co2   Long trend Weighted     0.742
## 4 BES    co2   Long trend Weighted     0    
## 5 UKR    co2   Long trend Weighted   -27.5  
## 6 ZWE    co2   Long trend Weighted    50.9

Step 2. Data preparation

# Standardize names just in case; then show NA count
names(co2) <- tolower(names(co2))
total_na <- sum(is.na(co2))
paste("Total missing values in the dataset:", total_na)

## [1] "Total missing values in the dataset: 2"

Step 3. Guiding question:

#Question: Which countries (by ISO_A3) show the highest CO₂ trend scores, and how do scores differ by trend type (e.g., long-term vs. recent)?

Step 4. Summarize with dplyr

# Average weighted score per country
co2_country <- co2 %>%
  group_by(iso_a3) %>%
  summarize(avg_score = mean(score, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(avg_score))

# Average weighted score by trend_type
co2_trendtype <- co2 %>%
  group_by(trend_type) %>%
  summarize(avg_score = mean(score, na.rm = TRUE), n = dplyr::n(), .groups = "drop") %>%
  arrange(desc(avg_score))

head(co2_country, 10)

## # A tibble: 10 × 2
##    iso_a3 avg_score
##    <chr>      <dbl>
##  1 MDV        100  
##  2 SMR        100  
##  3 GUM         97.6
##  4 SGP         51.9
##  5 GUY         50.8
##  6 GGY         50  
##  7 JEY         50  
##  8 NFK         50  
##  9 WLF         49.1
## 10 TCA         42.2

co2_trendtype

## # A tibble: 2 × 3
##   trend_type     avg_score     n
##   <chr>              <dbl> <int>
## 1 Recent changes     0.951   253
## 2 Long trend        -5.57    253

Step 5. Key Insight 1 - Top Countries with overall highest weighted scores

top_countries <- head(co2_country, 5)
top_countries

## # A tibble: 5 × 2
##   iso_a3 avg_score
##   <chr>      <dbl>
## 1 MDV        100  
## 2 SMR        100  
## 3 GUM         97.6
## 4 SGP         51.9
## 5 GUY         50.8

#Insight 1: These iso_a3 codes have the highest average weighted CO₂ trend scores in the data set

Step 6: Key Insight 2 - 5 countries with the most concerning recent declines in CO₂ trends

# See what values actually exist (case & whitespace sensitive)
co2 %>% dplyr::count(gas, sort = TRUE)

## # A tibble: 1 × 2
##   gas       n
##   <chr> <int>
## 1 co2     506

co2 %>% dplyr::count(trend_type, sort = TRUE)

## # A tibble: 2 × 2
##   trend_type         n
##   <chr>          <int>
## 1 Long trend       253
## 2 Recent changes   253

co2 <- co2 %>%
  dplyr::mutate(
    # replace Unicode subscript ₂ with "2", then lowercase/trim
    gas        = tolower(trimws(gsub("\u2082", "2", gas))),
    trend_type = tolower(trimws(trend_type)),
    score_type = tolower(trimws(score_type))
  )

# Quick check again after normalization
co2 %>% dplyr::count(gas, sort = TRUE)

## # A tibble: 1 × 2
##   gas       n
##   <chr> <int>
## 1 co2     506

co2 %>% dplyr::count(trend_type, sort = TRUE)

## # A tibble: 2 × 2
##   trend_type         n
##   <chr>          <int>
## 1 long trend       253
## 2 recent changes   253

co2_recent_declines <- co2 %>%
  dplyr::filter(trend_type == "recent changes", gas == "co2") %>%
  dplyr::group_by(iso_a3) %>%
  dplyr::summarize(avg_score = mean(score, na.rm = TRUE), .groups = "drop") %>%
  dplyr::arrange(avg_score) %>%
  dplyr::slice_head(n = 5)

co2_recent_declines

## # A tibble: 5 × 2
##   iso_a3 avg_score
##   <chr>      <dbl>
## 1 MAF       -100  
## 2 SPM       -100  
## 3 SXM       -100  
## 4 D07        -97.6
## 5 BHR        -73.3

#Insight 2: Shows which 5 countries are experiencing the steepest recent declines in their CO₂ emissions, based on the World Bank’s weighted trend score.

Step 7. Visualization with ggplot2

if (nrow(co2_recent_declines) > 0) {
  ggplot(co2_recent_declines, aes(x = reorder(iso_a3, avg_score), y = avg_score)) +
    geom_col(fill = "#e75480") +
    coord_flip() +
    labs(
      title = "Top 5 Countries with Most Concerning Recent CO2 Trend Declines",
      x     = "Country (ISO_A3 code)",
      y     = "Average Weighted Score"
    ) +
    theme_minimal()
} else {
  message("No rows available to plot for Insight 2 after both strict and fallback filters.")
}

CO₂ Emission Trends Analysis

Dimitra Beaty

2025-09-21

Step 1. Import the data

Step 2. Data preparation

Step 3. Guiding question:

Step 4. Summarize with dplyr

Step 5. Key Insight 1 - Top Countries with overall highest weighted scores

Step 6: Key Insight 2 - 5 countries with the most concerning recent declines in CO₂ trends

Step 7. Visualization with ggplot2