library(tidyverse) 
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)

Step 1. Import the data

# Import dataset
co2 <- read_csv("co2_trend_scores.csv", show_col_types = FALSE)

# Preview structure
head(co2)
## # A tibble: 6 × 5
##   ISO_A3 gas   trend_type score_type   score
##   <chr>  <chr> <chr>      <chr>        <dbl>
## 1 ESP    co2   Long trend Weighted    -9.51 
## 2 GBR    co2   Long trend Weighted     1.97 
## 3 PRT    co2   Long trend Weighted     0.742
## 4 BES    co2   Long trend Weighted     0    
## 5 UKR    co2   Long trend Weighted   -27.5  
## 6 ZWE    co2   Long trend Weighted    50.9

Step 2. Data preparation

# Standardize names just in case; then show NA count
names(co2) <- tolower(names(co2))
total_na <- sum(is.na(co2))
paste("Total missing values in the dataset:", total_na)
## [1] "Total missing values in the dataset: 2"

Step 3. Guiding question:

#Question: Which countries (by ISO_A3) show the highest CO₂ trend scores and which 5 countries are experiencing the steepest recent declines in their CO₂ emissions.

Step 4. Summarize with dplyr

# Average weighted score per country
co2_country <- co2 %>%
  group_by(iso_a3) %>%
  summarize(avg_score = mean(score, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(avg_score))

# Average weighted score by trend_type
co2_trendtype <- co2 %>%
  group_by(trend_type) %>%
  summarize(avg_score = mean(score, na.rm = TRUE), n = dplyr::n(), .groups = "drop") %>%
  arrange(desc(avg_score))

head(co2_country, 10)
## # A tibble: 10 × 2
##    iso_a3 avg_score
##    <chr>      <dbl>
##  1 MDV        100  
##  2 SMR        100  
##  3 GUM         97.6
##  4 SGP         51.9
##  5 GUY         50.8
##  6 GGY         50  
##  7 JEY         50  
##  8 NFK         50  
##  9 WLF         49.1
## 10 TCA         42.2
co2_trendtype
## # A tibble: 2 × 3
##   trend_type     avg_score     n
##   <chr>              <dbl> <int>
## 1 Recent changes     0.951   253
## 2 Long trend        -5.57    253

Step 5. Key Insight 1 - Top Countries with overall highest weighted scores

unique(co2$trend_type)
## [1] "Long trend"     "Recent changes"
library(stringr)
co2_top5 <- co2 %>%
  mutate(trend_type = str_trim(tolower(trend_type))) %>%
  filter(trend_type == "long trend", gas == "co2") %>%
  group_by(iso_a3) %>%
  summarize(avg_score = mean(score, na.rm = TRUE)) %>%
  arrange(desc(avg_score)) %>%
  slice_head(n = 5)

if (nrow(co2_top5) > 0) {
  countries1 <- paste(co2_top5$iso_a3, collapse = ", ")
  cat(
    "Insight 1: Looking at long-term CO₂ emission trends, ",
    "the five countries with the highest average weighted scores are: ",
    countries1, "."
  )
} else {
  cat("Insight 1: No rows matched the filter for long-term CO₂ emission trends in this dataset.")
}
## Insight 1: Looking at long-term CO₂ emission trends,  the five countries with the highest average weighted scores are:  MDV, NFK, SMR, GUM, WLF .

Step 7. Visualization with ggplot2

if (nrow(co2_recent_declines) > 0) {
  ggplot(co2_recent_declines, aes(x = reorder(iso_a3, avg_score), y = avg_score)) +
    geom_col(fill = "#e75480") +
    coord_flip() +
    labs(
      title = "Top 5 Countries with Most Concerning Recent CO2 Trend Declines",
      x     = "Country (ISO_A3 code)",
      y     = "Average Weighted Score"
    ) +
    theme_minimal()
} else {
  message("No rows available to plot for Insight 2 after both strict and fallback filters.")
}