library(tidyverse) 
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)

Step 1. Import the data

# Import dataset
co2 <- read_csv("co2_trend_scores.csv", show_col_types = FALSE)

# Preview structure
head(co2)
## # A tibble: 6 × 5
##   ISO_A3 gas   trend_type score_type   score
##   <chr>  <chr> <chr>      <chr>        <dbl>
## 1 ESP    co2   Long trend Weighted    -9.51 
## 2 GBR    co2   Long trend Weighted     1.97 
## 3 PRT    co2   Long trend Weighted     0.742
## 4 BES    co2   Long trend Weighted     0    
## 5 UKR    co2   Long trend Weighted   -27.5  
## 6 ZWE    co2   Long trend Weighted    50.9

Step 2. Data preparation

# Standardize names just in case; then show NA count
names(co2) <- tolower(names(co2))
total_na <- sum(is.na(co2))
paste("Total missing values in the dataset:", total_na)
## [1] "Total missing values in the dataset: 2"

Step 3. Guiding question:

#Question: Which countries (by ISO_A3) show the highest CO₂ trend scores, and how do scores differ by trend type (e.g., long-term vs. recent)?

Step 4. Summarize with dplyr

# Average weighted score per country
co2_country <- co2 %>%
  group_by(iso_a3) %>%
  summarize(avg_score = mean(score, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(avg_score))

# Average weighted score by trend_type
co2_trendtype <- co2 %>%
  group_by(trend_type) %>%
  summarize(avg_score = mean(score, na.rm = TRUE), n = dplyr::n(), .groups = "drop") %>%
  arrange(desc(avg_score))

head(co2_country, 10)
## # A tibble: 10 × 2
##    iso_a3 avg_score
##    <chr>      <dbl>
##  1 MDV        100  
##  2 SMR        100  
##  3 GUM         97.6
##  4 SGP         51.9
##  5 GUY         50.8
##  6 GGY         50  
##  7 JEY         50  
##  8 NFK         50  
##  9 WLF         49.1
## 10 TCA         42.2
co2_trendtype
## # A tibble: 2 × 3
##   trend_type     avg_score     n
##   <chr>              <dbl> <int>
## 1 Recent changes     0.951   253
## 2 Long trend        -5.57    253

Step 5. Key Insight 1 - Top Countries with overall highest weighted scores

top_countries <- head(co2_country, 5)
top_countries
## # A tibble: 5 × 2
##   iso_a3 avg_score
##   <chr>      <dbl>
## 1 MDV        100  
## 2 SMR        100  
## 3 GUM         97.6
## 4 SGP         51.9
## 5 GUY         50.8
#Insight 1: These iso_a3 codes have the highest average weighted CO₂ trend scores in the data set

Step 7. Visualization with ggplot2

if (nrow(co2_recent_declines) > 0) {
  ggplot(co2_recent_declines, aes(x = reorder(iso_a3, avg_score), y = avg_score)) +
    geom_col(fill = "#e75480") +
    coord_flip() +
    labs(
      title = "Top 5 Countries with Most Concerning Recent CO2 Trend Declines",
      x     = "Country (ISO_A3 code)",
      y     = "Average Weighted Score"
    ) +
    theme_minimal()
} else {
  message("No rows available to plot for Insight 2 after both strict and fallback filters.")
}